xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 12edc857407aaf0aa7a6c094ef0e6aef4d7f191b)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_constraints,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
130   n_R = pcis->n-pcbddc->n_actual_vertices;
131   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
132   /* local work vectors (try to avoid unneeded work)*/
133   /* R nodes */
134   old_size = -1;
135   if (pcbddc->vec1_R) {
136     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
137   }
138   if (n_R != old_size) {
139     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
140     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
141     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
143     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
144     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
145   }
146   /* local primal dofs */
147   old_size = -1;
148   if (pcbddc->vec1_P) {
149     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
150   }
151   if (pcbddc->local_primal_size != old_size) {
152     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
153     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
155     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
156   }
157   /* local explicit constraints */
158   old_size = -1;
159   if (pcbddc->vec1_C) {
160     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
161   }
162   if (n_constraints && n_constraints != old_size) {
163     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
164     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
166     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCBDDCSetUpCorrection"
173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
174 {
175   PetscErrorCode         ierr;
176   /* pointers to pcis and pcbddc */
177   PC_IS*                 pcis = (PC_IS*)pc->data;
178   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
179   /* submatrices of local problem */
180   Mat                    A_RV,A_VR,A_VV;
181   /* working matrices */
182   Mat                    M1,M2,M3,C_CR;
183   /* working vectors */
184   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
185   /* additional working stuff */
186   IS                     is_aux;
187   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
188   const PetscScalar      *array,*row_cmat_values;
189   const PetscInt         *row_cmat_indices,*idx_R_local;
190   PetscInt               *idx_V_B,*auxindices;
191   PetscInt               n_vertices,n_constraints,size_of_constraint;
192   PetscInt               i,j,n_R,n_D,n_B;
193   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
194   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
195   MatType                impMatType;
196   /* some shortcuts to scalars */
197   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
198   /* for debugging purposes */
199   PetscReal              *coarsefunctions_errors,*constraints_errors;
200 
201   PetscFunctionBegin;
202   /* get number of vertices (corners plus constraints with change of basis)
203      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
204   n_vertices = pcbddc->n_actual_vertices;
205   n_constraints = pcbddc->local_primal_size-n_vertices;
206   /* Set Non-overlapping dimensions */
207   n_B = pcis->n_B; n_D = pcis->n - n_B;
208   n_R = pcis->n-n_vertices;
209 
210   /* Set types for local objects needed by BDDC precondtioner */
211   impMatType = MATSEQDENSE;
212 
213   /* Allocating some extra storage just to be safe */
214   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
215   for (i=0;i<pcis->n;i++) auxindices[i]=i;
216 
217   /* vertices in boundary numbering */
218   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
219   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
220   if (i != n_vertices) {
221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
222   }
223 
224   /* Precompute stuffs needed for preprocessing and application of BDDC*/
225   if (n_constraints) {
226     /* see if we can save some allocations */
227     if (pcbddc->local_auxmat2) {
228       PetscInt on_R,on_constraints;
229       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
230       if (on_R != n_R || on_constraints != n_constraints) {
231         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
232         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
233       }
234     }
235     /* work vectors */
236     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
238     /* auxiliary matrices */
239     if (!pcbddc->local_auxmat2) {
240       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
241       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
242       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
243       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
244     }
245 
246     /* Extract constraints on R nodes: C_{CR}  */
247     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
248     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
249     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
250 
251     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
252     for (i=0;i<n_constraints;i++) {
253       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
254       /* Get row of constraint matrix in R numbering */
255       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
256       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
257       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
259       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
260       /* Solve for row of constraint matrix in R numbering */
261       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
262       /* Set values in local_auxmat2 */
263       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
264       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
265       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266     }
267     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
270 
271     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
272     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
273     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
274     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
275     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
276     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
277     ierr = MatSetUp(M1);CHKERRQ(ierr);
278     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
279     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
280     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
281     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
282     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
283     ierr = MatDestroy(&M2);CHKERRQ(ierr);
284     ierr = MatDestroy(&M3);CHKERRQ(ierr);
285     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
286     if (!pcbddc->local_auxmat1) {
287       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
288     } else {
289       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     }
291   }
292 
293   /* Get submatrices from subdomain matrix */
294   if (n_vertices) {
295     PetscInt ibs,mbs;
296     PetscBool issbaij;
297     Mat newmat;
298 
299     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
300     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
301     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
302     if (ibs != mbs) { /* need to convert to SEQAIJ */
303       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
304       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
307       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
308     } else {
309       /* this is safe */
310       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
312       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
313         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
314         /* which of the two approaches is faster? */
315         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
316         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
317         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
319         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
320       } else {
321         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
322         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323       }
324     }
325     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
326     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
327     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
328   }
329 
330   /* Matrix of coarse basis functions (local) */
331   if (pcbddc->coarse_phi_B) {
332     PetscInt on_B,on_primal;
333     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
334     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
335       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
336       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
337     }
338   }
339   if (pcbddc->coarse_phi_D) {
340     PetscInt on_D,on_primal;
341     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
342     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
343       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
344       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
345     }
346   }
347   if (!pcbddc->coarse_phi_B) {
348     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
349     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
350     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
351     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
352   }
353   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
354     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
355     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
356     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
357     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
358   }
359 
360   if (pcbddc->dbg_flag) {
361     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
362     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
364   }
365   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
366   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
367 
368   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
369 
370   /* vertices */
371   for (i=0;i<n_vertices;i++) {
372     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
373     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
374     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
375     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
376     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
377     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
378     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
379     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
382     if (n_constraints) {
383       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
384       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
386     }
387     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
388     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
389 
390     /* Set values in coarse basis function and subdomain part of coarse_mat */
391     /* coarse basis functions */
392     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
393     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
394     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
396     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
397     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
399     if (pcbddc->switch_static || pcbddc->dbg_flag) {
400       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
403       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405     }
406     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
407     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
408     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
409     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     if (n_constraints) {
411       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
412       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
413       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
414     }
415 
416     /* check */
417     if (pcbddc->dbg_flag) {
418       /* assemble subdomain vector on local nodes */
419       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
421       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
429       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       if (n_constraints) {
432         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
433         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
434         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
435       }
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
449     }
450   }
451 
452   /* constraints */
453   for (i=0;i<n_constraints;i++) {
454     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
455     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
456     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
457     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
458     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
459     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
460     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
461     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
462     if (n_vertices) {
463       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
464     }
465     /* Set values in coarse basis function and subdomain part of coarse_mat */
466     /* coarse basis functions */
467     j = i+n_vertices; /* don't touch this! */
468     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
469     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
472     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
473     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     if (pcbddc->switch_static || pcbddc->dbg_flag) {
475       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
478       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
479       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480     }
481     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
482     if (n_vertices) {
483       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
484       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
485       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
486     }
487     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
488     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
489     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
490 
491     if (pcbddc->dbg_flag) {
492       /* assemble subdomain vector on nodes */
493       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
494       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
495       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
496       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
498       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
499       /* assemble subdomain vector of lagrange multipliers */
500       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
501       if (n_vertices) {
502         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
503         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
504         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
505       }
506       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
507       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
508       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
510       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
512       /* check saddle point solution */
513       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
514       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
516       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
517       /* shift by the identity matrix */
518       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
522     }
523   }
524   /* call assembling routines for local coarse basis */
525   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   if (pcbddc->switch_static || pcbddc->dbg_flag) {
528     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   }
531 
532   /* compute other basis functions for non-symmetric problems */
533   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
534   if (!setsym || (setsym && !issym)) {
535     if (!pcbddc->coarse_psi_B) {
536       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
537       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
538       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
539       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
540     }
541     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
542       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
543       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
544       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
545       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
546     }
547     for (i=0;i<pcbddc->local_primal_size;i++) {
548       if (n_constraints) {
549         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
550         for (j=0;j<n_constraints;j++) {
551           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
552         }
553         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
554         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
555       }
556       if (i<n_vertices) {
557         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
558         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
559         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
560         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
561         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
562         if (n_constraints) {
563           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
564         }
565       } else {
566         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
567       }
568       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
569       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
570       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
573       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
574       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       if (i<n_vertices) {
576         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
577       }
578       if (pcbddc->switch_static || pcbddc->dbg_flag) {
579         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
582         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
583         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584       }
585 
586       if (pcbddc->dbg_flag) {
587         /* assemble subdomain vector on nodes */
588         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
589         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
590         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
591         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         if (i<n_vertices) {
593           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
594         }
595         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
596         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
597         /* assemble subdomain vector of lagrange multipliers */
598         for (j=0;j<pcbddc->local_primal_size;j++) {
599           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
600         }
601         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
602         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
603         /* check saddle point solution */
604         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
605         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
607         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
608         /* shift by the identity matrix */
609         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
610         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
613       }
614     }
615     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     if (pcbddc->switch_static || pcbddc->dbg_flag) {
618       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     }
621     unsymmetric_check = PETSC_TRUE;
622   } else { /* take references to already computed coarse basis */
623     unsymmetric_check = PETSC_FALSE;
624     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
625     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
626     if (pcbddc->coarse_phi_D) {
627       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
628       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
629     }
630   }
631   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
632   /* Checking coarse_sub_mat and coarse basis functios */
633   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
634   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
635   if (pcbddc->dbg_flag) {
636     Mat         coarse_sub_mat;
637     Mat         AUXMAT,TM1,TM2,TM3,TM4;
638     Mat         coarse_phi_D,coarse_phi_B;
639     Mat         coarse_psi_D,coarse_psi_B;
640     Mat         A_II,A_BB,A_IB,A_BI;
641     MatType     checkmattype=MATSEQAIJ;
642     PetscReal   real_value;
643 
644     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
645     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
646     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
648     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
649     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
650     if (unsymmetric_check) {
651       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
652       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
653     }
654     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
655 
656     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
657     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
658     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
659     if (unsymmetric_check) {
660       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
664       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
665       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
666       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
667       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
668       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
669       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672     } else {
673       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
674       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
675       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
676       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
677       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
678       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
679       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
680       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
681     }
682     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
683     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
684     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
686     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
687     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
688     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
689     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
693     for (i=0;i<pcbddc->local_primal_size;i++) {
694       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
695     }
696     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
697     for (i=0;i<pcbddc->local_primal_size;i++) {
698       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
699     }
700     if (unsymmetric_check) {
701       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
702       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
703         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
704       }
705       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
706       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
707         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
708       }
709     }
710     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
711     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
712     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
715     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
716     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
719     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
720     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
721     if (unsymmetric_check) {
722       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
724     }
725     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
726     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
727     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
728     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
729   }
730   /* free memory */
731   if (n_vertices) {
732     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
733     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
734     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
735     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
737   }
738   if (n_constraints) {
739     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
740     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
741     ierr = MatDestroy(&M1);CHKERRQ(ierr);
742     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
743   }
744   ierr = PetscFree(auxindices);CHKERRQ(ierr);
745   /* get back data */
746   *coarse_submat_vals_n = coarse_submat_vals;
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
752 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
753 {
754   PC_IS*            pcis = (PC_IS*)(pc->data);
755   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
756   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
757   PetscBool         issbaij,isseqaij;
758   /* manage repeated solves */
759   MatReuse          reuse;
760   MatStructure      matstruct;
761   PetscErrorCode    ierr;
762 
763   PetscFunctionBegin;
764   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
765     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
766   }
767   /* get mat flags */
768   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
769   reuse = MAT_INITIAL_MATRIX;
770   if (pc->setupcalled) {
771     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
772     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
773     if (matstruct == SAME_NONZERO_PATTERN) {
774       reuse = MAT_REUSE_MATRIX;
775     } else {
776       reuse = MAT_INITIAL_MATRIX;
777     }
778   }
779   if (reuse == MAT_INITIAL_MATRIX) {
780     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
781     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
783     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
784     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
785   }
786 
787   /* transform local matrices if needed */
788   if (pcbddc->use_change_of_basis) {
789     Mat         change_mat_all;
790     PetscScalar *row_cmat_values;
791     PetscInt    *row_cmat_indices;
792     PetscInt    *nnz,*is_indices,*temp_indices;
793     PetscInt    i,j,k,n_D,n_B;
794 
795     /* Get Non-overlapping dimensions */
796     n_B = pcis->n_B;
797     n_D = pcis->n-n_B;
798 
799     /* compute nonzero structure of change of basis on all local nodes */
800     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
801     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
802     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
803     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
804     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
805     k=1;
806     for (i=0;i<n_B;i++) {
807       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
808       nnz[is_indices[i]]=j;
809       if (k < j) k = j;
810       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
811     }
812     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
813     /* assemble change of basis matrix on the whole set of local dofs */
814     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
815     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
816     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
817     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
818     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
819     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820     for (i=0;i<n_D;i++) {
821       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
822     }
823     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
824     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825     for (i=0;i<n_B;i++) {
826       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
827       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
828       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
829       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
830     }
831     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
832     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
834     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
835     if (isseqaij) {
836       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
837     } else {
838       Mat work_mat;
839       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
840       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
841       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
842     }
843     /*
844     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
845     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
846     */
847     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
848     ierr = PetscFree(nnz);CHKERRQ(ierr);
849     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
850   } else {
851     /* without change of basis, the local matrix is unchanged */
852     if (!pcbddc->local_mat) {
853       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
854       pcbddc->local_mat = matis->A;
855     }
856   }
857 
858   /* get submatrices */
859   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
860   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
861   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
862   if (!issbaij) {
863     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
865   } else {
866     Mat newmat;
867     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
868     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
869     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
870     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
877 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
878 {
879   PC_IS*         pcis = (PC_IS*)(pc->data);
880   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
881   IS             is_aux1,is_aux2;
882   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
883   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
884   PetscInt       vbs,bs;
885   PetscBT        bitmask;
886   PetscErrorCode ierr;
887 
888   PetscFunctionBegin;
889   /*
890     No need to setup local scatters if
891       - primal space is unchanged
892         AND
893       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
894         AND
895       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
896   */
897   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
898     PetscFunctionReturn(0);
899   }
900   /* destroy old objects */
901   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
902   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
903   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
904   /* Set Non-overlapping dimensions */
905   n_B = pcis->n_B; n_D = pcis->n - n_B;
906   n_vertices = pcbddc->n_actual_vertices;
907   /* create auxiliary bitmask */
908   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
909   for (i=0;i<n_vertices;i++) {
910     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
911   }
912 
913   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
914   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
915   for (i=0, n_R=0; i<pcis->n; i++) {
916     if (!PetscBTLookup(bitmask,i)) {
917       idx_R_local[n_R] = i;
918       n_R++;
919     }
920   }
921 
922   /* Block code */
923   vbs = 1;
924   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
925   if (bs>1 && !(n_vertices%bs)) {
926     PetscBool is_blocked = PETSC_TRUE;
927     PetscInt  *vary;
928     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
929     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
930     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
931     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
932     for (i=0; i<n_vertices; i++) {
933       if (vary[i]!=0 && vary[i]!=bs) {
934         is_blocked = PETSC_FALSE;
935         break;
936       }
937     }
938     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
939       vbs = bs;
940       for (i=0;i<n_R/vbs;i++) {
941         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
942       }
943     }
944     ierr = PetscFree(vary);CHKERRQ(ierr);
945   }
946   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
947   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
948 
949   /* print some info if requested */
950   if (pcbddc->dbg_flag) {
951     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
952     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
953     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
956     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
957     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
958     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
959   }
960 
961   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
962   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
963   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
964   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
965   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
966   for (i=0; i<n_D; i++) {
967     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
968   }
969   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
970   for (i=0, j=0; i<n_R; i++) {
971     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
972       aux_array1[j++] = i;
973     }
974   }
975   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
976   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
977   for (i=0, j=0; i<n_B; i++) {
978     if (!PetscBTLookup(bitmask,is_indices[i])) {
979       aux_array2[j++] = i;
980     }
981   }
982   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
983   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
984   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
985   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
986   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
987 
988   if (pcbddc->switch_static || pcbddc->dbg_flag) {
989     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
990     for (i=0, j=0; i<n_R; i++) {
991       if (PetscBTLookup(bitmask,idx_R_local[i])) {
992         aux_array1[j++] = i;
993       }
994     }
995     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
996     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
997     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
998   }
999   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1000   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1007 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1008 {
1009   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1010   PC_IS          *pcis = (PC_IS*)pc->data;
1011   PC             pc_temp;
1012   Mat            A_RR;
1013   MatStructure   matstruct;
1014   MatReuse       reuse;
1015   PetscScalar    m_one = -1.0;
1016   PetscReal      value;
1017   PetscInt       n_D,n_R,ibs,mbs;
1018   PetscBool      use_exact,use_exact_reduced,issbaij;
1019   PetscErrorCode ierr;
1020   /* prefixes stuff */
1021   char           dir_prefix[256],neu_prefix[256],str_level[16];
1022   size_t         len;
1023 
1024   PetscFunctionBegin;
1025   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1026 
1027   /* compute prefixes */
1028   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1029   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1030   if (!pcbddc->current_level) {
1031     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1032     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1033     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1034     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1035   } else {
1036     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1037     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1038     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1039     len -= 15; /* remove "pc_bddc_coarse_" */
1040     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1041     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1042     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1043     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1044     *(dir_prefix+len)='\0';
1045     *(neu_prefix+len)='\0';
1046     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1047     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1048     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1049     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1050   }
1051 
1052   /* DIRICHLET PROBLEM */
1053   /* Matrix for Dirichlet problem is pcis->A_II */
1054   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1055   if (!pcbddc->ksp_D) { /* create object if not yet build */
1056     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1057     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1058     /* default */
1059     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1060     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1061     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1062     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1063     if (issbaij) {
1064       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1065     } else {
1066       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1067     }
1068     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1069     /* Allow user's customization */
1070     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1071   }
1072   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1073   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1074   if (!n_D) {
1075     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1076     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1077   }
1078   /* Set Up KSP for Dirichlet problem of BDDC */
1079   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1080   /* set ksp_D into pcis data */
1081   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1082   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1083   pcis->ksp_D = pcbddc->ksp_D;
1084 
1085   /* NEUMANN PROBLEM */
1086   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1087   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1088   if (pcbddc->ksp_R) { /* already created ksp */
1089     PetscInt nn_R;
1090     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1091     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1092     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1093     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1094       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1095       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1096       reuse = MAT_INITIAL_MATRIX;
1097     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1098       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1099         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1100         reuse = MAT_INITIAL_MATRIX;
1101       } else { /* safe to reuse the matrix */
1102         reuse = MAT_REUSE_MATRIX;
1103       }
1104     }
1105     /* last check */
1106     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1107       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1108       reuse = MAT_INITIAL_MATRIX;
1109     }
1110   } else { /* first time, so we need to create the matrix */
1111     reuse = MAT_INITIAL_MATRIX;
1112   }
1113   /* extract A_RR */
1114   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1115   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1116   if (ibs != mbs) {
1117     Mat newmat;
1118     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1119     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1120     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1121   } else {
1122     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1123   }
1124   if (!pcbddc->ksp_R) { /* create object if not present */
1125     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1126     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1127     /* default */
1128     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1129     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1130     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1131     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1132     if (issbaij) {
1133       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1134     } else {
1135       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1136     }
1137     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1138     /* Allow user's customization */
1139     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1140   }
1141   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1142   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1143   if (!n_R) {
1144     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1145     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1146   }
1147   /* Set Up KSP for Neumann problem of BDDC */
1148   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1149 
1150   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1151   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1152     /* Dirichlet */
1153     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1154     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1155     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1156     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1157     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1158     /* need to be adapted? */
1159     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1160     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1161     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1162     /* print info */
1163     if (pcbddc->dbg_flag) {
1164       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1165       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1166       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1168       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1169       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1170     }
1171     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1172       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1173     }
1174 
1175     /* Neumann */
1176     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1177     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1178     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1179     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1180     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1181     /* need to be adapted? */
1182     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1183     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1184     /* print info */
1185     if (pcbddc->dbg_flag) {
1186       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1187       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1188     }
1189     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1190       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1191     }
1192   }
1193   /* free Neumann problem's matrix */
1194   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1200 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1201 {
1202   PetscErrorCode ierr;
1203   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1204 
1205   PetscFunctionBegin;
1206   if (applytranspose) {
1207     if (pcbddc->local_auxmat1) {
1208       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1209       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1210     }
1211     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1212   } else {
1213     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1214     if (pcbddc->local_auxmat1) {
1215       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1216       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1217     }
1218   }
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1223 #undef __FUNCT__
1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1225 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1226 {
1227   PetscErrorCode ierr;
1228   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1229   PC_IS*            pcis = (PC_IS*)  (pc->data);
1230   const PetscScalar zero = 0.0;
1231 
1232   PetscFunctionBegin;
1233   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1234   if (applytranspose) {
1235     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1236     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1237   } else {
1238     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1239     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1240   }
1241   /* start communications from local primal nodes to rhs of coarse solver */
1242   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1243   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1247   /* TODO remove null space when doing multilevel */
1248   if (pcbddc->coarse_ksp) {
1249     if (applytranspose) {
1250       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1251     } else {
1252       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1253     }
1254   }
1255   /* start communications from coarse solver solution to local primal nodes */
1256   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257 
1258   /* Local solution on R nodes */
1259   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1260   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1261   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1262   if (pcbddc->switch_static) {
1263     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1265   }
1266   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1267   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1268   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   if (pcbddc->switch_static) {
1271     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273   }
1274 
1275   /* complete communications from coarse sol to local primal nodes */
1276   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1277 
1278   /* Sum contributions from two levels */
1279   if (applytranspose) {
1280     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1281     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1282   } else {
1283     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1284     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1285   }
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1290 #undef __FUNCT__
1291 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1292 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1293 {
1294   PetscErrorCode ierr;
1295   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1296   PetscScalar    *array,*array2;
1297   Vec            from,to;
1298 
1299   PetscFunctionBegin;
1300   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1301     from = pcbddc->coarse_vec;
1302     to = pcbddc->vec1_P;
1303     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1304       Vec tvec;
1305       PetscInt lsize;
1306       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1307       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1308       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1309       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1310       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1311       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1312       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1313     }
1314   } else { /* from local to global -> put data in coarse right hand side */
1315     from = pcbddc->vec1_P;
1316     to = pcbddc->coarse_vec;
1317   }
1318   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1324 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1325 {
1326   PetscErrorCode ierr;
1327   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1328   PetscScalar    *array,*array2;
1329   Vec            from,to;
1330 
1331   PetscFunctionBegin;
1332   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1333     from = pcbddc->coarse_vec;
1334     to = pcbddc->vec1_P;
1335   } else { /* from local to global -> put data in coarse right hand side */
1336     from = pcbddc->vec1_P;
1337     to = pcbddc->coarse_vec;
1338   }
1339   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1340   if (smode == SCATTER_FORWARD) {
1341     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1342       Vec tvec;
1343       PetscInt lsize;
1344       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1345       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1346       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1347       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1348       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1349       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1350       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 /* uncomment for testing purposes */
1357 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1358 #undef __FUNCT__
1359 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1360 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1361 {
1362   PetscErrorCode    ierr;
1363   PC_IS*            pcis = (PC_IS*)(pc->data);
1364   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1365   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1366   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1367   MatType           impMatType=MATSEQAIJ;
1368   /* one and zero */
1369   PetscScalar       one=1.0,zero=0.0;
1370   /* space to store constraints and their local indices */
1371   PetscScalar       *temp_quadrature_constraint;
1372   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1373   /* iterators */
1374   PetscInt          i,j,k,total_counts,temp_start_ptr;
1375   /* stuff to store connected components stored in pcbddc->mat_graph */
1376   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1377   PetscInt          n_ISForFaces,n_ISForEdges;
1378   /* near null space stuff */
1379   MatNullSpace      nearnullsp;
1380   const Vec         *nearnullvecs;
1381   Vec               *localnearnullsp;
1382   PetscBool         nnsp_has_cnst;
1383   PetscInt          nnsp_size;
1384   PetscScalar       *array;
1385   /* BLAS integers */
1386   PetscBLASInt      lwork,lierr;
1387   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1388   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1389   /* LAPACK working arrays for SVD or POD */
1390   PetscBool         skip_lapack;
1391   PetscScalar       *work;
1392   PetscReal         *singular_vals;
1393 #if defined(PETSC_USE_COMPLEX)
1394   PetscReal         *rwork;
1395 #endif
1396 #if defined(PETSC_MISSING_LAPACK_GESVD)
1397   PetscBLASInt      Blas_one_2=1;
1398   PetscScalar       *temp_basis,*correlation_mat;
1399 #else
1400   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1401   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1402 #endif
1403   /* reuse */
1404   PetscInt          olocal_primal_size;
1405   PetscInt          *oprimal_indices_local_idxs;
1406   /* change of basis */
1407   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1408   PetscBool         boolforchange,qr_needed;
1409   PetscBT           touched,change_basis,qr_needed_idx;
1410   /* auxiliary stuff */
1411   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1412   /* some quantities */
1413   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1414   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1415 
1416 
1417   PetscFunctionBegin;
1418   /* Destroy Mat objects computed previously */
1419   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1420   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1421   /* Get index sets for faces, edges and vertices from graph */
1422   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1423     pcbddc->use_vertices = PETSC_TRUE;
1424   }
1425   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1426   /* HACK: provide functions to set change of basis */
1427   if (!ISForVertices && pcbddc->NullSpace) {
1428     pcbddc->use_change_of_basis = PETSC_TRUE;
1429     pcbddc->use_change_on_faces = PETSC_FALSE;
1430   }
1431   /* print some info */
1432   if (pcbddc->dbg_flag) {
1433     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1434     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1435     i = 0;
1436     if (ISForVertices) {
1437       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1438     }
1439     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1441     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1442     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1443   }
1444   /* check if near null space is attached to global mat */
1445   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1446   if (nearnullsp) {
1447     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1448     /* remove any stored info */
1449     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1450     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1451     /* store information for BDDC solver reuse */
1452     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1453     pcbddc->onearnullspace = nearnullsp;
1454     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1455     for (i=0;i<nnsp_size;i++) {
1456       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1457     }
1458   } else { /* if near null space is not provided BDDC uses constants by default */
1459     nnsp_size = 0;
1460     nnsp_has_cnst = PETSC_TRUE;
1461   }
1462   /* get max number of constraints on a single cc */
1463   max_constraints = nnsp_size;
1464   if (nnsp_has_cnst) max_constraints++;
1465 
1466   /*
1467        Evaluate maximum storage size needed by the procedure
1468        - temp_indices will contain start index of each constraint stored as follows
1469        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1470        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1471        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1472                                                                                                                                                          */
1473   total_counts = n_ISForFaces+n_ISForEdges;
1474   total_counts *= max_constraints;
1475   n_vertices = 0;
1476   if (ISForVertices) {
1477     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1478   }
1479   total_counts += n_vertices;
1480   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1481   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1482   total_counts = 0;
1483   max_size_of_constraint = 0;
1484   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1485     if (i<n_ISForEdges) {
1486       used_IS = &ISForEdges[i];
1487     } else {
1488       used_IS = &ISForFaces[i-n_ISForEdges];
1489     }
1490     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1491     total_counts += j;
1492     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1493   }
1494   total_counts *= max_constraints;
1495   total_counts += n_vertices;
1496   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1497   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1498   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1499   /* get local part of global near null space vectors */
1500   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1501   for (k=0;k<nnsp_size;k++) {
1502     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1503     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505   }
1506 
1507   /* whether or not to skip lapack calls */
1508   skip_lapack = PETSC_TRUE;
1509   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1510 
1511   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1512   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1513     PetscScalar temp_work;
1514 #if defined(PETSC_MISSING_LAPACK_GESVD)
1515     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1516     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1517     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1518     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1519 #if defined(PETSC_USE_COMPLEX)
1520     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1521 #endif
1522     /* now we evaluate the optimal workspace using query with lwork=-1 */
1523     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1524     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1525     lwork = -1;
1526     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1527 #if !defined(PETSC_USE_COMPLEX)
1528     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1529 #else
1530     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1531 #endif
1532     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1533     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1534 #else /* on missing GESVD */
1535     /* SVD */
1536     PetscInt max_n,min_n;
1537     max_n = max_size_of_constraint;
1538     min_n = max_constraints;
1539     if (max_size_of_constraint < max_constraints) {
1540       min_n = max_size_of_constraint;
1541       max_n = max_constraints;
1542     }
1543     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1544 #if defined(PETSC_USE_COMPLEX)
1545     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1546 #endif
1547     /* now we evaluate the optimal workspace using query with lwork=-1 */
1548     lwork = -1;
1549     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1550     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1551     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1552     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1553 #if !defined(PETSC_USE_COMPLEX)
1554     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1555 #else
1556     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1557 #endif
1558     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1559     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1560 #endif /* on missing GESVD */
1561     /* Allocate optimal workspace */
1562     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1563     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1564   }
1565   /* Now we can loop on constraining sets */
1566   total_counts = 0;
1567   temp_indices[0] = 0;
1568   /* vertices */
1569   if (ISForVertices) {
1570     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1571     if (nnsp_has_cnst) { /* consider all vertices */
1572       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1573       for (i=0;i<n_vertices;i++) {
1574         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1575         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1576         total_counts++;
1577       }
1578     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1579       PetscBool used_vertex;
1580       for (i=0;i<n_vertices;i++) {
1581         used_vertex = PETSC_FALSE;
1582         k = 0;
1583         while (!used_vertex && k<nnsp_size) {
1584           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1585           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1586             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1587             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1588             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1589             total_counts++;
1590             used_vertex = PETSC_TRUE;
1591           }
1592           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1593           k++;
1594         }
1595       }
1596     }
1597     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1598     n_vertices = total_counts;
1599   }
1600 
1601   /* edges and faces */
1602   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1603     if (i<n_ISForEdges) {
1604       used_IS = &ISForEdges[i];
1605       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1606     } else {
1607       used_IS = &ISForFaces[i-n_ISForEdges];
1608       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1609     }
1610     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1611     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1612     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1613     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1614     /* change of basis should not be performed on local periodic nodes */
1615     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1616     if (nnsp_has_cnst) {
1617       PetscScalar quad_value;
1618       temp_constraints++;
1619       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1620       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1621       for (j=0;j<size_of_constraint;j++) {
1622         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1623       }
1624       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1625       total_counts++;
1626     }
1627     for (k=0;k<nnsp_size;k++) {
1628       PetscReal real_value;
1629       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1630       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1631       for (j=0;j<size_of_constraint;j++) {
1632         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1633       }
1634       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1635       /* check if array is null on the connected component */
1636       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1637       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1638       if (real_value > 0.0) { /* keep indices and values */
1639         temp_constraints++;
1640         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1641         total_counts++;
1642       }
1643     }
1644     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1645     valid_constraints = temp_constraints;
1646     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1647     if (!pcbddc->use_nnsp_true && temp_constraints) {
1648       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1649 
1650 #if defined(PETSC_MISSING_LAPACK_GESVD)
1651       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1652          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1653          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1654             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1655             from that computed using LAPACKgesvd
1656          -> This is due to a different computation of eigenvectors in LAPACKheev
1657          -> The quality of the POD-computed basis will be the same */
1658       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1659       /* Store upper triangular part of correlation matrix */
1660       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1661       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1662       for (j=0;j<temp_constraints;j++) {
1663         for (k=0;k<j+1;k++) {
1664           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1665         }
1666       }
1667       /* compute eigenvalues and eigenvectors of correlation matrix */
1668       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1669       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1670 #if !defined(PETSC_USE_COMPLEX)
1671       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1672 #else
1673       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1674 #endif
1675       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1676       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1677       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1678       j=0;
1679       while (j < temp_constraints && singular_vals[j] < tol) j++;
1680       total_counts=total_counts-j;
1681       valid_constraints = temp_constraints-j;
1682       /* scale and copy POD basis into used quadrature memory */
1683       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1684       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1685       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1686       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1687       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1688       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1689       if (j<temp_constraints) {
1690         PetscInt ii;
1691         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1692         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1693         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1694         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1695         for (k=0;k<temp_constraints-j;k++) {
1696           for (ii=0;ii<size_of_constraint;ii++) {
1697             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1698           }
1699         }
1700       }
1701 #else  /* on missing GESVD */
1702       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1703       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1704       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1705       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1706 #if !defined(PETSC_USE_COMPLEX)
1707       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1708 #else
1709       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1710 #endif
1711       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1712       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1713       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1714       k = temp_constraints;
1715       if (k > size_of_constraint) k = size_of_constraint;
1716       j = 0;
1717       while (j < k && singular_vals[k-j-1] < tol) j++;
1718       total_counts = total_counts-temp_constraints+k-j;
1719       valid_constraints = k-j;
1720 #endif /* on missing GESVD */
1721     }
1722     /* setting change_of_basis flag is safe now */
1723     if (boolforchange) {
1724       for (j=0;j<valid_constraints;j++) {
1725         PetscBTSet(change_basis,total_counts-j-1);
1726       }
1727     }
1728   }
1729   /* free index sets of faces, edges and vertices */
1730   for (i=0;i<n_ISForFaces;i++) {
1731     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1732   }
1733   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1734   for (i=0;i<n_ISForEdges;i++) {
1735     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1736   }
1737   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1738   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1739   /* map temp_indices_to_constraint in boundary numbering */
1740   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1741   if (i != temp_indices[total_counts]) {
1742     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1743   }
1744 
1745   /* free workspace */
1746   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1747     ierr = PetscFree(work);CHKERRQ(ierr);
1748 #if defined(PETSC_USE_COMPLEX)
1749     ierr = PetscFree(rwork);CHKERRQ(ierr);
1750 #endif
1751     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1752 #if defined(PETSC_MISSING_LAPACK_GESVD)
1753     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1754     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1755 #endif
1756   }
1757   for (k=0;k<nnsp_size;k++) {
1758     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1759   }
1760   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1761 
1762   /* set quantities in pcbddc data structure and store previous primal size */
1763   /* n_vertices defines the number of subdomain corners in the primal space */
1764   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1765   olocal_primal_size = pcbddc->local_primal_size;
1766   pcbddc->local_primal_size = total_counts;
1767   pcbddc->n_vertices = n_vertices;
1768   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1769 
1770   /* Create constraint matrix */
1771   /* The constraint matrix is used to compute the l2g map of primal dofs */
1772   /* so we need to set it up properly either with or without change of basis */
1773   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1774   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1775   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1776   /* array to compute a local numbering of constraints : vertices first then constraints */
1777   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1778   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1779   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1780   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1781   /* auxiliary stuff for basis change */
1782   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1783   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1784 
1785   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1786   total_primal_vertices=0;
1787   for (i=0;i<pcbddc->local_primal_size;i++) {
1788     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1789     if (size_of_constraint == 1) {
1790       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1791       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1792       aux_primal_minloc[total_primal_vertices]=0;
1793       total_primal_vertices++;
1794     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1795       PetscInt min_loc,min_index;
1796       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1797       /* find first untouched local node */
1798       k = 0;
1799       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1800       min_index = global_indices[k];
1801       min_loc = k;
1802       /* search the minimum among global nodes already untouched on the cc */
1803       for (k=1;k<size_of_constraint;k++) {
1804         /* there can be more than one constraint on a single connected component */
1805         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1806           min_index = global_indices[k];
1807           min_loc = k;
1808         }
1809       }
1810       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1811       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1812       aux_primal_minloc[total_primal_vertices]=min_loc;
1813       total_primal_vertices++;
1814     }
1815   }
1816   /* determine if a QR strategy is needed for change of basis */
1817   qr_needed = PETSC_FALSE;
1818   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1819   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1820     if (PetscBTLookup(change_basis,i)) {
1821       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1822       j = 0;
1823       for (k=0;k<size_of_constraint;k++) {
1824         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1825           j++;
1826         }
1827       }
1828       /* found more than one primal dof on the cc */
1829       if (j > 1) {
1830         PetscBTSet(qr_needed_idx,i);
1831         qr_needed = PETSC_TRUE;
1832       }
1833     }
1834   }
1835   /* free workspace */
1836   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1837 
1838   /* permute indices in order to have a sorted set of vertices */
1839   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1840 
1841   /* nonzero structure of constraint matrix */
1842   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1843   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1844   j=total_primal_vertices;
1845   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1846     if (!PetscBTLookup(change_basis,i)) {
1847       nnz[j]=temp_indices[i+1]-temp_indices[i];
1848       j++;
1849     }
1850   }
1851   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1852   ierr = PetscFree(nnz);CHKERRQ(ierr);
1853   /* set values in constraint matrix */
1854   for (i=0;i<total_primal_vertices;i++) {
1855     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1856   }
1857   total_counts = total_primal_vertices;
1858   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1859     if (!PetscBTLookup(change_basis,i)) {
1860       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1861       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1862       total_counts++;
1863     }
1864   }
1865   /* assembling */
1866   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   /*
1869   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1870   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1871   */
1872   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1873   if (pcbddc->use_change_of_basis) {
1874     /* dual and primal dofs on a single cc */
1875     PetscInt     dual_dofs,primal_dofs;
1876     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1877     PetscInt     primal_counter;
1878     /* working stuff for GEQRF */
1879     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1880     PetscBLASInt lqr_work;
1881     /* working stuff for UNGQR */
1882     PetscScalar  *gqr_work,lgqr_work_t;
1883     PetscBLASInt lgqr_work;
1884     /* working stuff for TRTRS */
1885     PetscScalar  *trs_rhs;
1886     PetscBLASInt Blas_NRHS;
1887     /* pointers for values insertion into change of basis matrix */
1888     PetscInt     *start_rows,*start_cols;
1889     PetscScalar  *start_vals;
1890     /* working stuff for values insertion */
1891     PetscBT      is_primal;
1892 
1893     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1894     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1895     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1896     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1897     /* work arrays */
1898     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1899     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1900     /* nonzeros per row */
1901     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1902       if (PetscBTLookup(change_basis,i)) {
1903         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1904         if (PetscBTLookup(qr_needed_idx,i)) {
1905           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1906         } else {
1907           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1908           /* get local primal index on the cc */
1909           j = 0;
1910           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1911           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1912         }
1913       }
1914     }
1915     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1916     ierr = PetscFree(nnz);CHKERRQ(ierr);
1917     /* Set initial identity in the matrix */
1918     for (i=0;i<pcis->n_B;i++) {
1919       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1920     }
1921 
1922     if (pcbddc->dbg_flag) {
1923       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1924       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1925     }
1926 
1927 
1928     /* Now we loop on the constraints which need a change of basis */
1929     /*
1930        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1931        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1932 
1933        Basic blocks of change of basis matrix T computed
1934 
1935           - Using the following block transformation if there is only a primal dof on the cc
1936             (in the example, primal dof is the last one of the edge in LOCAL ordering
1937              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1938             | 1        0   ...        0     1 |
1939             | 0        1   ...        0     1 |
1940             |              ...                |
1941             | 0        ...            1     1 |
1942             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1943 
1944           - via QR decomposition of constraints otherwise
1945     */
1946     if (qr_needed) {
1947       /* space to store Q */
1948       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1949       /* first we issue queries for optimal work */
1950       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1951       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1953       lqr_work = -1;
1954       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1955       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1956       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1957       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1958       lgqr_work = -1;
1959       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1960       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1961       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1962       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1963       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1964       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1965       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1966       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1967       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1968       /* array to store scaling factors for reflectors */
1969       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1970       /* array to store rhs and solution of triangular solver */
1971       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1972       /* allocating workspace for check */
1973       if (pcbddc->dbg_flag) {
1974         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1975       }
1976     }
1977     /* array to store whether a node is primal or not */
1978     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1979     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1980     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1981     if (i != total_primal_vertices) {
1982       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1983     }
1984     for (i=0;i<total_primal_vertices;i++) {
1985       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1986     }
1987     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1988 
1989     /* loop on constraints and see whether or not they need a change of basis and compute it */
1990     /* -> using implicit ordering contained in temp_indices data */
1991     total_counts = pcbddc->n_vertices;
1992     primal_counter = total_counts;
1993     while (total_counts<pcbddc->local_primal_size) {
1994       primal_dofs = 1;
1995       if (PetscBTLookup(change_basis,total_counts)) {
1996         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1997         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1998           primal_dofs++;
1999         }
2000         /* get constraint info */
2001         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2002         dual_dofs = size_of_constraint-primal_dofs;
2003 
2004         if (pcbddc->dbg_flag) {
2005           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2006         }
2007 
2008         if (primal_dofs > 1) { /* QR */
2009 
2010           /* copy quadrature constraints for change of basis check */
2011           if (pcbddc->dbg_flag) {
2012             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2013           }
2014           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2015           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2016 
2017           /* compute QR decomposition of constraints */
2018           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2019           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2020           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2021           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2022           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2023           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2024           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2025 
2026           /* explictly compute R^-T */
2027           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2028           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2029           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2030           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2032           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2033           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2034           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2035           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2036           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2037 
2038           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2039           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2040           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2041           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2042           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2043           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2044           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2045           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2046           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2047 
2048           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2049              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2050              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2052           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2053           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2056           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2057           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2058           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2059           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2060           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2061 
2062           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2063           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2064           /* insert cols for primal dofs */
2065           for (j=0;j<primal_dofs;j++) {
2066             start_vals = &qr_basis[j*size_of_constraint];
2067             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2068             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2069           }
2070           /* insert cols for dual dofs */
2071           for (j=0,k=0;j<dual_dofs;k++) {
2072             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2073               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2074               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2075               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2076               j++;
2077             }
2078           }
2079 
2080           /* check change of basis */
2081           if (pcbddc->dbg_flag) {
2082             PetscInt   ii,jj;
2083             PetscBool valid_qr=PETSC_TRUE;
2084             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2085             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2086             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2087             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2088             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2089             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2090             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2091             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2092             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2093             for (jj=0;jj<size_of_constraint;jj++) {
2094               for (ii=0;ii<primal_dofs;ii++) {
2095                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2096                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2097               }
2098             }
2099             if (!valid_qr) {
2100               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2101               for (jj=0;jj<size_of_constraint;jj++) {
2102                 for (ii=0;ii<primal_dofs;ii++) {
2103                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2104                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2105                   }
2106                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2107                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2108                   }
2109                 }
2110               }
2111             } else {
2112               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2113             }
2114           }
2115         } else { /* simple transformation block */
2116           PetscInt row,col;
2117           PetscScalar val;
2118           for (j=0;j<size_of_constraint;j++) {
2119             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2120             if (!PetscBTLookup(is_primal,row)) {
2121               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2122               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2123               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2124             } else {
2125               for (k=0;k<size_of_constraint;k++) {
2126                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2127                 if (row != col) {
2128                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2129                 } else {
2130                   val = 1.0;
2131                 }
2132                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2133               }
2134             }
2135           }
2136           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2137         }
2138         /* increment primal counter */
2139         primal_counter += primal_dofs;
2140       } else {
2141         if (pcbddc->dbg_flag) {
2142           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2143         }
2144       }
2145       /* increment constraint counter total_counts */
2146       total_counts += primal_dofs;
2147     }
2148 
2149     /* free workspace */
2150     if (qr_needed) {
2151       if (pcbddc->dbg_flag) {
2152         ierr = PetscFree(work);CHKERRQ(ierr);
2153       }
2154       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2155       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2156       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2157       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2158       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2159     }
2160     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2161     /* assembling */
2162     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2163     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164     /*
2165     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2166     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2167     */
2168   }
2169 
2170   /* get indices in local ordering for vertices and constraints */
2171   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2172     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2173     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2174   }
2175   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2176   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2177   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2178   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2179   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2180   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2181   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2182   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2183   /* set quantities in PCBDDC data struct */
2184   pcbddc->n_actual_vertices = i;
2185   /* check if a new primal space has been introduced */
2186   pcbddc->new_primal_space_local = PETSC_TRUE;
2187   if (olocal_primal_size == pcbddc->local_primal_size) {
2188     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2189     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2190     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2191   }
2192   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2193   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2194 
2195   /* flush dbg viewer */
2196   if (pcbddc->dbg_flag) {
2197     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2198   }
2199 
2200   /* free workspace */
2201   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2202   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2203   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2204   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2205   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2206   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2207   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2208   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2214 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2215 {
2216   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2217   PC_IS       *pcis = (PC_IS*)pc->data;
2218   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2219   PetscInt    ierr,i,vertex_size;
2220   PetscViewer viewer=pcbddc->dbg_viewer;
2221 
2222   PetscFunctionBegin;
2223   /* Reset previously computed graph */
2224   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2225   /* Init local Graph struct */
2226   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2227 
2228   /* Check validity of the csr graph passed in by the user */
2229   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2230     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2231   }
2232 
2233   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2234   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2235     Mat mat_adj;
2236     const PetscInt *xadj,*adjncy;
2237     PetscBool flg_row=PETSC_TRUE;
2238 
2239     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2240     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2241     if (!flg_row) {
2242       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2243     }
2244     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2245     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2246     if (!flg_row) {
2247       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2248     }
2249     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2250   }
2251 
2252   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2253   vertex_size = 1;
2254   if (pcbddc->user_provided_isfordofs) {
2255     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2256       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2257       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2258         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2259         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2260       }
2261       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2262       pcbddc->n_ISForDofs = 0;
2263       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2264     }
2265     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2266     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2267   } else {
2268     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2269       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2270       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2271       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2272         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2273       }
2274     }
2275   }
2276 
2277   /* Setup of Graph */
2278   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2279     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2280   }
2281   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2282     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2283   }
2284   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2285 
2286   /* Graph's connected components analysis */
2287   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2288 
2289   /* print some info to stdout */
2290   if (pcbddc->dbg_flag) {
2291     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2292   }
2293 
2294   /* mark topography has done */
2295   pcbddc->recompute_topography = PETSC_FALSE;
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2301 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2302 {
2303   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2304   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   n = 0;
2309   vertices = 0;
2310   if (pcbddc->ConstraintMatrix) {
2311     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2312     for (i=0;i<local_primal_size;i++) {
2313       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2314       if (size_of_constraint == 1) n++;
2315       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2316     }
2317     if (vertices_idx) {
2318       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2319       n = 0;
2320       for (i=0;i<local_primal_size;i++) {
2321         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2322         if (size_of_constraint == 1) {
2323           vertices[n++]=row_cmat_indices[0];
2324         }
2325         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2326       }
2327     }
2328   }
2329   *n_vertices = n;
2330   if (vertices_idx) *vertices_idx = vertices;
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2336 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2337 {
2338   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2339   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2340   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2341   PetscBT        touched;
2342   PetscErrorCode ierr;
2343 
2344     /* This function assumes that the number of local constraints per connected component
2345        is not greater than the number of nodes defined for the connected component
2346        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2347   PetscFunctionBegin;
2348   n = 0;
2349   constraints_index = 0;
2350   if (pcbddc->ConstraintMatrix) {
2351     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2352     max_size_of_constraint = 0;
2353     for (i=0;i<local_primal_size;i++) {
2354       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2355       if (size_of_constraint > 1) {
2356         n++;
2357       }
2358       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2359       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2360     }
2361     if (constraints_idx) {
2362       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2363       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2364       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2365       n = 0;
2366       for (i=0;i<local_primal_size;i++) {
2367         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2368         if (size_of_constraint > 1) {
2369           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2370           /* find first untouched local node */
2371           j = 0;
2372           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2373           min_index = row_cmat_global_indices[j];
2374           min_loc = j;
2375           /* search the minimum among nodes not yet touched on the connected component
2376              since there can be more than one constraint on a single cc */
2377           for (j=1;j<size_of_constraint;j++) {
2378             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2379               min_index = row_cmat_global_indices[j];
2380               min_loc = j;
2381             }
2382           }
2383           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2384           constraints_index[n++] = row_cmat_indices[min_loc];
2385         }
2386         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2387       }
2388       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2389       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2390     }
2391   }
2392   *n_constraints = n;
2393   if (constraints_idx) *constraints_idx = constraints_index;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 /* the next two functions has been adapted from pcis.c */
2398 #undef __FUNCT__
2399 #define __FUNCT__ "PCBDDCApplySchur"
2400 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2401 {
2402   PetscErrorCode ierr;
2403   PC_IS          *pcis = (PC_IS*)(pc->data);
2404 
2405   PetscFunctionBegin;
2406   if (!vec2_B) { vec2_B = v; }
2407   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2408   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2409   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2410   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2411   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2417 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2418 {
2419   PetscErrorCode ierr;
2420   PC_IS          *pcis = (PC_IS*)(pc->data);
2421 
2422   PetscFunctionBegin;
2423   if (!vec2_B) { vec2_B = v; }
2424   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2425   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2426   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2427   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2428   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "PCBDDCSubsetNumbering"
2434 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2435 {
2436   Vec            local_vec,global_vec;
2437   IS             seqis,paris;
2438   VecScatter     scatter_ctx;
2439   PetscScalar    *array;
2440   PetscInt       *temp_global_dofs;
2441   PetscScalar    globalsum;
2442   PetscInt       i,j,s;
2443   PetscInt       nlocals,first_index,old_index,max_local;
2444   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2445   PetscMPIInt    *dof_sizes,*dof_displs;
2446   PetscBool      first_found;
2447   PetscErrorCode ierr;
2448 
2449   PetscFunctionBegin;
2450   /* mpi buffers */
2451   MPI_Comm_size(comm,&size_prec_comm);
2452   MPI_Comm_rank(comm,&rank_prec_comm);
2453   j = ( !rank_prec_comm ? size_prec_comm : 0);
2454   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2455   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2456   /* get maximum size of subset */
2457   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2458   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2459   max_local = 0;
2460   if (n_local_dofs) {
2461     max_local = temp_global_dofs[0];
2462     for (i=1;i<n_local_dofs;i++) {
2463       if (max_local < temp_global_dofs[i] ) {
2464         max_local = temp_global_dofs[i];
2465       }
2466     }
2467   }
2468   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2469   max_global++;
2470   max_local = 0;
2471   if (n_local_dofs) {
2472     max_local = local_dofs[0];
2473     for (i=1;i<n_local_dofs;i++) {
2474       if (max_local < local_dofs[i] ) {
2475         max_local = local_dofs[i];
2476       }
2477     }
2478   }
2479   max_local++;
2480   /* allocate workspace */
2481   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2482   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2483   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2484   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2485   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2486   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2487   /* create scatter */
2488   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2489   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2490   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2491   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2492   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2493   /* init array */
2494   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2495   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2496   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2497   if (local_dofs_mult) {
2498     for (i=0;i<n_local_dofs;i++) {
2499       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2500     }
2501   } else {
2502     for (i=0;i<n_local_dofs;i++) {
2503       array[local_dofs[i]]=1.0;
2504     }
2505   }
2506   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2507   /* scatter into global vec and get total number of global dofs */
2508   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2509   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2510   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2511   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2512   /* Fill global_vec with cumulative function for global numbering */
2513   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2514   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2515   nlocals = 0;
2516   first_index = -1;
2517   first_found = PETSC_FALSE;
2518   for (i=0;i<s;i++) {
2519     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2520       first_found = PETSC_TRUE;
2521       first_index = i;
2522     }
2523     nlocals += (PetscInt)PetscRealPart(array[i]);
2524   }
2525   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2526   if (!rank_prec_comm) {
2527     dof_displs[0]=0;
2528     for (i=1;i<size_prec_comm;i++) {
2529       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2530     }
2531   }
2532   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2533   if (first_found) {
2534     array[first_index] += (PetscScalar)nlocals;
2535     old_index = first_index;
2536     for (i=first_index+1;i<s;i++) {
2537       if (PetscRealPart(array[i]) > 0.0) {
2538         array[i] += array[old_index];
2539         old_index = i;
2540       }
2541     }
2542   }
2543   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2544   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2545   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2546   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2547   /* get global ordering of local dofs */
2548   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2549   if (local_dofs_mult) {
2550     for (i=0;i<n_local_dofs;i++) {
2551       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2552     }
2553   } else {
2554     for (i=0;i<n_local_dofs;i++) {
2555       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2556     }
2557   }
2558   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2559   /* free workspace */
2560   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2561   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2562   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2563   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2564   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2565   /* return pointer to global ordering of local dofs */
2566   *global_numbering_subset = temp_global_dofs;
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2572 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2573 {
2574   PetscInt       i,j;
2575   PetscScalar    *alphas;
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   /* this implements stabilized Gram-Schmidt */
2580   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2581   for (i=0;i<n;i++) {
2582     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2583     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2584     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2585   }
2586   ierr = PetscFree(alphas);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 /* TODO
2591    - now preallocation is done assuming SEQDENSE local matrices
2592 */
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatISGetMPIXAIJ"
2595 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2596 {
2597   Mat                    new_mat;
2598   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2599   /* info on mat */
2600   /* ISLocalToGlobalMapping rmapping,cmapping; */
2601   PetscInt               bs,rows,cols;
2602   PetscInt               lrows,lcols;
2603   PetscInt               local_rows,local_cols;
2604   PetscBool              isdense;
2605   /* values insertion */
2606   PetscScalar            *array;
2607   PetscInt               *local_indices,*global_indices;
2608   /* work */
2609   PetscInt               i,j,index_row;
2610   PetscErrorCode         ierr;
2611 
2612   PetscFunctionBegin;
2613   /* MISSING CHECKS
2614     - rectangular case not covered (it is not allowed by MATIS)
2615   */
2616   /* get info from mat */
2617   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2618   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2619   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2620   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2621 
2622   /* work */
2623   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2624   for (i=0;i<local_rows;i++) local_indices[i]=i;
2625   /* map indices of local mat to global values */
2626   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2627   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2628   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2629 
2630   if (reuse==MAT_INITIAL_MATRIX) {
2631     Vec         vec_dnz,vec_onz;
2632     PetscScalar *my_dnz,*my_onz;
2633     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2634     PetscInt    index_col,owner;
2635     PetscMPIInt nsubdomains;
2636 
2637     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2638     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2639     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2640     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2641     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2642     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2643     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2644 
2645     /*
2646       preallocation
2647     */
2648 
2649     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2650     /*
2651        Some vectors are needed to sum up properly on shared interface dofs.
2652        Preallocation macros cannot do the job.
2653        Note that preallocation is not exact, since it overestimates nonzeros
2654     */
2655     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2656     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2657     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2658     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2659     /* All processes need to compute entire row ownership */
2660     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2661     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2662     for (i=0;i<nsubdomains;i++) {
2663       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2664         row_ownership[j]=i;
2665       }
2666     }
2667 
2668     /*
2669        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2670        then, they will be summed up properly. This way, preallocation is always sufficient
2671     */
2672     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2673     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2674     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2675     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2676     for (i=0;i<local_rows;i++) {
2677       index_row = global_indices[i];
2678       for (j=i;j<local_rows;j++) {
2679         owner = row_ownership[index_row];
2680         index_col = global_indices[j];
2681         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2682           my_dnz[i] += 1.0;
2683         } else { /* offdiag block */
2684           my_onz[i] += 1.0;
2685         }
2686         /* same as before, interchanging rows and cols */
2687         if (i != j) {
2688           owner = row_ownership[index_col];
2689           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2690             my_dnz[j] += 1.0;
2691           } else {
2692             my_onz[j] += 1.0;
2693           }
2694         }
2695       }
2696     }
2697     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2698     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2699     if (local_rows) { /* multilevel guard */
2700       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2701       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2702     }
2703     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2704     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2705     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2706     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2707     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2708     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2709     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2710 
2711     /* set computed preallocation in dnz and onz */
2712     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2713     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2714     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2715     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2716     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2717     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2718     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2719     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2720 
2721     /* Resize preallocation if overestimated */
2722     for (i=0;i<lrows;i++) {
2723       dnz[i] = PetscMin(dnz[i],lcols);
2724       onz[i] = PetscMin(onz[i],cols-lcols);
2725     }
2726     /* set preallocation */
2727     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2728     for (i=0;i<lrows/bs;i++) {
2729       dnz[i] = dnz[i*bs]/bs;
2730       onz[i] = onz[i*bs]/bs;
2731     }
2732     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2733     for (i=0;i<lrows/bs;i++) {
2734       dnz[i] = dnz[i]-i;
2735     }
2736     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2737     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2738     *M = new_mat;
2739   } else {
2740     PetscInt mbs,mrows,mcols;
2741     /* some checks */
2742     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2743     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2744     if (mrows != rows) {
2745       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2746     }
2747     if (mrows != rows) {
2748       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2749     }
2750     if (mbs != bs) {
2751       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2752     }
2753     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2754   }
2755   /* set local to global mappings */
2756   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2757   /* Set values */
2758   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2759   if (isdense) { /* special case for dense local matrices */
2760     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2761     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2762     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2763     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2764     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2765     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2766   } else { /* very basic values insertion for all other matrix types */
2767     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2768     for (i=0;i<local_rows;i++) {
2769       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2770       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2771       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2772       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2773       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2774       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2775     }
2776     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2777   }
2778   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2780   if (isdense) {
2781     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2782   }
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatISGetSubassemblingPattern"
2788 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2789 {
2790   Mat             subdomain_adj;
2791   IS              new_ranks,ranks_send_to;
2792   MatPartitioning partitioner;
2793   Mat_IS          *matis;
2794   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2795   PetscInt        prank;
2796   PetscMPIInt     size,rank,color;
2797   PetscInt        *xadj,*adjncy,*oldranks;
2798   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2799   PetscInt        i,j,local_size,threshold=0;
2800   PetscErrorCode  ierr;
2801   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2802   PetscSubcomm    subcomm;
2803 
2804   PetscFunctionBegin;
2805   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2806   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2808 
2809   /* Get info on mapping */
2810   matis = (Mat_IS*)(mat->data);
2811   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2812   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2813 
2814   /* build local CSR graph of subdomains' connectivity */
2815   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2816   xadj[0] = 0;
2817   xadj[1] = PetscMax(n_neighs-1,0);
2818   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2819   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2820 
2821   if (threshold) {
2822     PetscInt* count,min_threshold;
2823     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2824     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2825     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2826       for (j=0;j<n_shared[i];j++) {
2827         count[shared[i][j]] += 1;
2828       }
2829     }
2830     /* adapt threshold since we dont want to lose significant connections */
2831     min_threshold = n_neighs;
2832     for (i=1;i<n_neighs;i++) {
2833       for (j=0;j<n_shared[i];j++) {
2834         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2835       }
2836     }
2837     threshold = PetscMax(min_threshold+1,threshold);
2838     xadj[1] = 0;
2839     for (i=1;i<n_neighs;i++) {
2840       for (j=0;j<n_shared[i];j++) {
2841         if (count[shared[i][j]] < threshold) {
2842           adjncy[xadj[1]] = neighs[i];
2843           adjncy_wgt[xadj[1]] = n_shared[i];
2844           xadj[1]++;
2845           break;
2846         }
2847       }
2848     }
2849     ierr = PetscFree(count);CHKERRQ(ierr);
2850   } else {
2851     if (xadj[1]) {
2852       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2853       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2854     }
2855   }
2856   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2857   if (use_square) {
2858     for (i=0;i<xadj[1];i++) {
2859       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2860     }
2861   }
2862   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2863 
2864   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2865 
2866   /*
2867     Restrict work on active processes only.
2868   */
2869   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2870   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2871   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2872   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2873   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2874   if (color) {
2875     ierr = PetscFree(xadj);CHKERRQ(ierr);
2876     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2877     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2878   } else {
2879     PetscInt coarsening_ratio;
2880     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2881     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2882     prank = rank;
2883     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2884     /*
2885     for (i=0;i<size;i++) {
2886       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2887     }
2888     */
2889     for (i=0;i<xadj[1];i++) {
2890       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2891     }
2892     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2893     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2894     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2895 
2896     /* Partition */
2897     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2898     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2899     if (use_vwgt) {
2900       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2901       v_wgt[0] = local_size;
2902       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2903     }
2904     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2905     coarsening_ratio = size/n_subdomains;
2906     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2907     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2908     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2909     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2910     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2911 
2912     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2913     if (contiguous) {
2914       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2915     } else {
2916       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2917     }
2918     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2919     /* clean up */
2920     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2921     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2922     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2923     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2924   }
2925   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2926 
2927   /* assemble parallel IS for sends */
2928   i = 1;
2929   if (color) i=0;
2930   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2931 
2932   /* get back IS */
2933   *is_sends = ranks_send_to;
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatISSubassemble"
2941 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2942 {
2943   Mat                    local_mat;
2944   Mat_IS                 *matis;
2945   IS                     is_sends_internal;
2946   PetscInt               rows,cols;
2947   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2948   PetscBool              ismatis,isdense,destroy_mat;
2949   ISLocalToGlobalMapping l2gmap;
2950   PetscInt*              l2gmap_indices;
2951   const PetscInt*        is_indices;
2952   MatType                new_local_type;
2953   /* buffers */
2954   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2955   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2956   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2957   /* MPI */
2958   MPI_Comm               comm,comm_n;
2959   PetscSubcomm           subcomm;
2960   PetscMPIInt            n_sends,n_recvs,commsize;
2961   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2962   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2963   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2964   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2965   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2966   PetscErrorCode         ierr;
2967 
2968   PetscFunctionBegin;
2969   /* TODO: add missing checks */
2970   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2971   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2972   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2973   PetscValidLogicalCollectiveInt(mat,nis,7);
2974   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2975   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2976   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2977   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2978   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2979   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2980   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2981   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2982     PetscInt mrows,mcols,mnrows,mncols;
2983     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2984     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2985     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2986     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2987     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2988     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2989   }
2990   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2991   PetscValidLogicalCollectiveInt(mat,bs,0);
2992   /* prepare IS for sending if not provided */
2993   if (!is_sends) {
2994     PetscBool pcontig = PETSC_TRUE;
2995     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2996     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
2997   } else {
2998     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2999     is_sends_internal = is_sends;
3000   }
3001 
3002   /* get pointer of MATIS data */
3003   matis = (Mat_IS*)mat->data;
3004 
3005   /* get comm */
3006   comm = PetscObjectComm((PetscObject)mat);
3007 
3008   /* compute number of sends */
3009   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3010   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3011 
3012   /* compute number of receives */
3013   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3014   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
3015   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3016   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3017   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3018   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3019   ierr = PetscFree(iflags);CHKERRQ(ierr);
3020 
3021   /* restrict comm if requested */
3022   subcomm = 0;
3023   destroy_mat = PETSC_FALSE;
3024   if (restrict_comm) {
3025     PetscMPIInt color,rank,subcommsize;
3026     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3027     color = 0;
3028     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3029     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3030     subcommsize = commsize - subcommsize;
3031     /* check if reuse has been requested */
3032     if (reuse == MAT_REUSE_MATRIX) {
3033       if (*mat_n) {
3034         PetscMPIInt subcommsize2;
3035         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3036         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3037         comm_n = PetscObjectComm((PetscObject)*mat_n);
3038       } else {
3039         comm_n = PETSC_COMM_SELF;
3040       }
3041     } else { /* MAT_INITIAL_MATRIX */
3042       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3043       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3044       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3045       comm_n = subcomm->comm;
3046     }
3047     /* flag to destroy *mat_n if not significative */
3048     if (color) destroy_mat = PETSC_TRUE;
3049   } else {
3050     comm_n = comm;
3051   }
3052 
3053   /* prepare send/receive buffers */
3054   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
3055   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3056   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
3057   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3058   if (nis) {
3059     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3060     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3061   }
3062 
3063   /* Get data from local matrices */
3064   if (!isdense) {
3065     /* TODO: See below some guidelines on how to prepare the local buffers */
3066     /*
3067        send_buffer_vals should contain the raw values of the local matrix
3068        send_buffer_idxs should contain:
3069        - MatType_PRIVATE type
3070        - PetscInt        size_of_l2gmap
3071        - PetscInt        global_row_indices[size_of_l2gmap]
3072        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3073     */
3074   } else {
3075     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3076     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3077     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
3078     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3079     send_buffer_idxs[1] = i;
3080     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3081     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3082     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3083     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3084     for (i=0;i<n_sends;i++) {
3085       ilengths_vals[is_indices[i]] = len*len;
3086       ilengths_idxs[is_indices[i]] = len+2;
3087     }
3088   }
3089   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3090   /* additional is (if any) */
3091   if (nis) {
3092     PetscMPIInt psum;
3093     PetscInt j;
3094     for (j=0,psum=0;j<nis;j++) {
3095       PetscInt plen;
3096       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3097       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3098       psum += len+1; /* indices + lenght */
3099     }
3100     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3101     for (j=0,psum=0;j<nis;j++) {
3102       PetscInt plen;
3103       const PetscInt *is_array_idxs;
3104       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3105       send_buffer_idxs_is[psum] = plen;
3106       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3107       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3108       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3109       psum += plen+1; /* indices + lenght */
3110     }
3111     for (i=0;i<n_sends;i++) {
3112       ilengths_idxs_is[is_indices[i]] = psum;
3113     }
3114     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3115   }
3116 
3117   buf_size_idxs = 0;
3118   buf_size_vals = 0;
3119   buf_size_idxs_is = 0;
3120   for (i=0;i<n_recvs;i++) {
3121     buf_size_idxs += (PetscInt)olengths_idxs[i];
3122     buf_size_vals += (PetscInt)olengths_vals[i];
3123     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3124   }
3125   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
3126   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
3127   ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr);
3128 
3129   /* get new tags for clean communications */
3130   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3131   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3132   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3133 
3134   /* allocate for requests */
3135   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
3136   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
3137   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr);
3138   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
3139   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
3140   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr);
3141 
3142   /* communications */
3143   ptr_idxs = recv_buffer_idxs;
3144   ptr_vals = recv_buffer_vals;
3145   ptr_idxs_is = recv_buffer_idxs_is;
3146   for (i=0;i<n_recvs;i++) {
3147     source_dest = onodes[i];
3148     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3149     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3150     ptr_idxs += olengths_idxs[i];
3151     ptr_vals += olengths_vals[i];
3152     if (nis) {
3153       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3154       ptr_idxs_is += olengths_idxs_is[i];
3155     }
3156   }
3157   for (i=0;i<n_sends;i++) {
3158     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3159     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3160     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3161     if (nis) {
3162       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3163     }
3164   }
3165   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3166   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3167 
3168   /* assemble new l2g map */
3169   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3170   ptr_idxs = recv_buffer_idxs;
3171   buf_size_idxs = 0;
3172   for (i=0;i<n_recvs;i++) {
3173     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3174     ptr_idxs += olengths_idxs[i];
3175   }
3176   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3177   ptr_idxs = recv_buffer_idxs;
3178   buf_size_idxs = 0;
3179   for (i=0;i<n_recvs;i++) {
3180     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3181     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3182     ptr_idxs += olengths_idxs[i];
3183   }
3184   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3185   ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3186   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3187 
3188   /* infer new local matrix type from received local matrices type */
3189   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3190   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3191   if (n_recvs) {
3192     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3193     ptr_idxs = recv_buffer_idxs;
3194     for (i=0;i<n_recvs;i++) {
3195       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3196         new_local_type_private = MATAIJ_PRIVATE;
3197         break;
3198       }
3199       ptr_idxs += olengths_idxs[i];
3200     }
3201     switch (new_local_type_private) {
3202       case MATDENSE_PRIVATE:
3203         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3204           new_local_type = MATSEQAIJ;
3205           bs = 1;
3206         } else { /* if I receive only 1 dense matrix */
3207           new_local_type = MATSEQDENSE;
3208           bs = 1;
3209         }
3210         break;
3211       case MATAIJ_PRIVATE:
3212         new_local_type = MATSEQAIJ;
3213         bs = 1;
3214         break;
3215       case MATBAIJ_PRIVATE:
3216         new_local_type = MATSEQBAIJ;
3217         break;
3218       case MATSBAIJ_PRIVATE:
3219         new_local_type = MATSEQSBAIJ;
3220         break;
3221       default:
3222         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3223         break;
3224     }
3225   } else { /* by default, new_local_type is seqdense */
3226     new_local_type = MATSEQDENSE;
3227     bs = 1;
3228   }
3229 
3230   /* create MATIS object if needed */
3231   if (reuse == MAT_INITIAL_MATRIX) {
3232     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3233     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3234   } else {
3235     /* it also destroys the local matrices */
3236     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3237   }
3238   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3239   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3240   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3241   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3242 
3243   /* set values */
3244   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3245   ptr_vals = recv_buffer_vals;
3246   ptr_idxs = recv_buffer_idxs;
3247   for (i=0;i<n_recvs;i++) {
3248     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3249       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3250       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3251       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3252       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3253       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3254     } else {
3255       /* TODO */
3256     }
3257     ptr_idxs += olengths_idxs[i];
3258     ptr_vals += olengths_vals[i];
3259   }
3260   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3262   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264 
3265 #if 1
3266   if (!restrict_comm) { /* check */
3267     Vec       lvec,rvec;
3268     PetscReal infty_error;
3269 
3270     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3271     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3272     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3273     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3274     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3275     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3276     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3277     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3278     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3279   }
3280 #endif
3281 
3282   /* assemble new additional is (if any) */
3283   if (nis) {
3284     PetscInt **temp_idxs,*count_is,j,psum;
3285 
3286     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3287     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3288     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3289     ptr_idxs = recv_buffer_idxs_is;
3290     psum = 0;
3291     for (i=0;i<n_recvs;i++) {
3292       for (j=0;j<nis;j++) {
3293         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3294         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3295         psum += plen;
3296         ptr_idxs += plen+1; /* shift pointer to received data */
3297       }
3298     }
3299     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3300     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3301     for (i=1;i<nis;i++) {
3302       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3303     }
3304     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3305     ptr_idxs = recv_buffer_idxs_is;
3306     for (i=0;i<n_recvs;i++) {
3307       for (j=0;j<nis;j++) {
3308         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3309         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3310         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3311         ptr_idxs += plen+1; /* shift pointer to received data */
3312       }
3313     }
3314     for (i=0;i<nis;i++) {
3315       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3316       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3317       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3318     }
3319     ierr = PetscFree(count_is);CHKERRQ(ierr);
3320     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3321     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3322   }
3323   /* free workspace */
3324   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3325   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3326   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3327   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3328   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3329   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3330   if (isdense) {
3331     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3332     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3333   } else {
3334     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3335   }
3336   if (nis) {
3337     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3338     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3339   }
3340   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3341   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3342   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3343   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3344   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3345   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3346   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3347   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3348   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3349   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3350   ierr = PetscFree(onodes);CHKERRQ(ierr);
3351   if (nis) {
3352     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3353     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3354     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3355   }
3356   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3357   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3358     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3359     for (i=0;i<nis;i++) {
3360       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3361     }
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 /* temporary hack into ksp private data structure */
3367 #include <petsc-private/kspimpl.h>
3368 
3369 #undef __FUNCT__
3370 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3371 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3372 {
3373   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3374   PC_IS                  *pcis = (PC_IS*)pc->data;
3375   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3376   MatNullSpace           CoarseNullSpace=NULL;
3377   ISLocalToGlobalMapping coarse_islg;
3378   IS                     coarse_is;
3379   PetscInt               max_it;
3380   PetscInt               im_active=-1,active_procs=-1;
3381   PC                     pc_temp;
3382   PCType                 coarse_pc_type;
3383   KSPType                coarse_ksp_type;
3384   PetscBool              multilevel_requested,multilevel_allowed;
3385   PetscBool              setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse;
3386   MatStructure           matstruct;
3387   PetscErrorCode         ierr;
3388 
3389   PetscFunctionBegin;
3390   /* Assign global numbering to coarse dofs */
3391   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3392     PetscInt ocoarse_size;
3393     ocoarse_size = pcbddc->coarse_size;
3394     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3395     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3396     /* see if we can avoid some work */
3397     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3398       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3399         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3400         coarse_reuse = PETSC_FALSE;
3401       } else { /* we can safely reuse already computed coarse matrix */
3402         coarse_reuse = PETSC_TRUE;
3403       }
3404     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3405       coarse_reuse = PETSC_FALSE;
3406     }
3407     /* reset any subassembling information */
3408     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3409   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3410     coarse_reuse = PETSC_TRUE;
3411   }
3412 
3413   /* test if we can go multilevel */
3414   multilevel_allowed = PETSC_FALSE;
3415   multilevel_requested = PETSC_FALSE;
3416   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3417   if (multilevel_requested) {
3418     /* count "active processes" */
3419     im_active = !!(pcis->n);
3420     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3421     if (active_procs/pcbddc->coarsening_ratio < 2) {
3422       multilevel_allowed = PETSC_FALSE;
3423     } else {
3424       multilevel_allowed = PETSC_TRUE;
3425     }
3426   }
3427 
3428   /* creates temporary l2gmap and IS for coarse indexes */
3429   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3430   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3431 
3432   /* creates temporary MATIS object for coarse matrix */
3433   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3434   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3435   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3436   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3437   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3438   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3439 
3440   /* set defaults for coarse KSP and PC */
3441   if (multilevel_allowed) {
3442     coarse_ksp_type = KSPRICHARDSON;
3443     coarse_pc_type = PCBDDC;
3444   } else {
3445     coarse_ksp_type = KSPPREONLY;
3446     coarse_pc_type = PCREDUNDANT;
3447   }
3448 
3449   /* create the coarse KSP object only once with defaults */
3450   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3451   if (!pcbddc->coarse_ksp) {
3452     char prefix[256],str_level[16];
3453     size_t len;
3454     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3455     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3456     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3457     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr);
3458     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3459     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3460     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3461     /* prefix */
3462     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3463     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3464     if (!pcbddc->current_level) {
3465       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3466       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3467     } else {
3468       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3469       if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3470       if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3471       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3472       *(prefix+len)='\0';
3473       sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3474       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3475     }
3476     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3477     /* allow user customization */
3478     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3479   }
3480 
3481   /* get some info after set from options */
3482   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3483   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3484   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3485   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3486     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3487     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3488     isbddc = PETSC_FALSE;
3489   }
3490 
3491   /* propagate BDDC info to the next level */
3492   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3493   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3494   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3495   if (isbddc) { /* protects from unneded computations */
3496     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3497     const PetscInt         *idxs;
3498     IS                     *c_isfordofs,c_isneumann;
3499     ISLocalToGlobalMapping tmap;
3500 
3501     /* create map between primal indices (in local representative ordering) and local subdomain numbering */
3502     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3503     /* allocate space for temporary storage */
3504     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3505     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3506     /* dofs splitting */
3507     ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr);
3508     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3509       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3510       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3511       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3512       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3513       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3514       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3515       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr);
3516       /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */
3517     }
3518     ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr);
3519     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3520       ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr);
3521     }
3522     ierr = PetscFree(c_isfordofs);CHKERRQ(ierr);
3523     /* neumann boundaries */
3524     if (pcbddc->NeumannBoundariesLocal) {
3525       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3526       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3527       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3528       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3529       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3530       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3531       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr);
3532       /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */
3533       ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr);
3534     }
3535     ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr);
3536     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3537     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3538     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3539   }
3540 
3541   /* destroy no longer needed map */
3542   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3543 
3544   /* assemble coarse matrix */
3545   if (isbddc || isnn) {
3546     if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3547       ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3548       if (pcbddc->dbg_flag) {
3549         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3550         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3551         ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr);
3552       }
3553     }
3554     if (coarse_reuse) {
3555       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3556       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3557       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,MAT_REUSE_MATRIX,&coarse_mat,0,NULL);CHKERRQ(ierr);
3558     } else {
3559       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat,0,NULL);CHKERRQ(ierr);
3560     }
3561   } else {
3562     if (coarse_reuse) {
3563       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3564       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3565       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3566     } else {
3567       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3568     }
3569   }
3570   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3571 
3572   /* create local to global scatters for coarse problem */
3573   if (pcbddc->new_primal_space) {
3574     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3575     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3576     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3577     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3578     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3579   }
3580   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3581 
3582   /* propagate symmetry info to coarse matrix */
3583   issym = PETSC_FALSE;
3584   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3585   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3586   isherm = PETSC_FALSE;
3587   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3588   ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3589 
3590 /* temporary disabled code */
3591 #if 0
3592   /* Compute coarse null space (special handling by BDDC only) */
3593   if (pcbddc->NullSpace) {
3594     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3595     if (isbddc) {
3596       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3597     } else {
3598       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3599     }
3600   }
3601 #endif
3602 
3603   /* set operators */
3604   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3605 
3606   /* additional KSP customization */
3607   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3608   if (max_it < 5) {
3609     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3610   }
3611 
3612   /* print some info if requested */
3613   if (pcbddc->dbg_flag) {
3614     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3615     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3616     if (!multilevel_allowed) {
3617       if (multilevel_requested) {
3618         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3619       } else if (pcbddc->max_levels) {
3620         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3621       }
3622     }
3623     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3624     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3625   }
3626 
3627   /* setup coarse ksp */
3628   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3629   /* hack -> use a DM on coarse_ksp to remove this hack */
3630   ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_rhs),&((pcbddc->coarse_ksp)->vec_sol));CHKERRQ(ierr);
3631 
3632   /* Check coarse problem if in debug mode or if solving with an iterative method */
3633   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3634   if (pcbddc->dbg_flag || !ispreonly) {
3635     KSP       check_ksp;
3636     KSPType   check_ksp_type;
3637     PC        check_pc;
3638     Vec       check_vec;
3639     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3640     PetscInt  its;
3641     PetscBool compute;
3642 
3643     /* Create ksp object suitable for estimation of extreme eigenvalues */
3644     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3645     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3646     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3647     if (ispreonly) {
3648       check_ksp_type = KSPPREONLY;
3649       compute = PETSC_FALSE;
3650     } else {
3651       check_ksp_type = KSPGMRES;
3652       compute = PETSC_TRUE;
3653     }
3654     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3655     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3656     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3657     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3658     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3659     /* create random vec */
3660     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3661     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3662     if (CoarseNullSpace) {
3663       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3664     }
3665     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3666     /* solve coarse problem */
3667     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3668     if (CoarseNullSpace) {
3669       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3670     }
3671     /* set eigenvalue estimation if preonly has not been requested */
3672     if (!ispreonly) {
3673       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3674       /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */
3675       ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3676       ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3677     }
3678     /* check coarse problem residual error */
3679     if (pcbddc->dbg_flag) {
3680       ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3681       ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3682       ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3683       ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3684       ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3685       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3686       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3687       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3688       if (!ispreonly) {
3689         ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3690         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3691       }
3692       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3693     }
3694     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3695   }
3696 
3697   /* print additional info */
3698   if (pcbddc->dbg_flag) {
3699     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3700     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3701     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3702     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3703   }
3704 
3705   /* free memory */
3706   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3707   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3708   PetscFunctionReturn(0);
3709 }
3710 
3711 #undef __FUNCT__
3712 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3713 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3714 {
3715   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3716   PC_IS*         pcis = (PC_IS*)pc->data;
3717   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3718   PetscInt       i,coarse_size;
3719   PetscInt       *local_primal_indices;
3720   PetscErrorCode ierr;
3721 
3722   PetscFunctionBegin;
3723   /* Compute global number of coarse dofs */
3724   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3725     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3726   }
3727   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3728 
3729   /* check numbering */
3730   if (pcbddc->dbg_flag) {
3731     PetscScalar coarsesum,*array;
3732 
3733     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3734     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3735     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3736     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3737     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3738     for (i=0;i<pcbddc->local_primal_size;i++) {
3739       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3740     }
3741     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3742     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3743     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3744     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3745     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3746     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3747     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3748     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3749     for (i=0;i<pcis->n;i++) {
3750       if (array[i] == 1.0) {
3751         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3752       }
3753     }
3754     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3755     for (i=0;i<pcis->n;i++) {
3756       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3757     }
3758     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3759     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3760     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3761     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3762     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3763     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3764     if (pcbddc->dbg_flag > 1) {
3765       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3766       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3767       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3768       for (i=0;i<pcbddc->local_primal_size;i++) {
3769         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3770       }
3771       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3772     }
3773     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3774   }
3775   /* get back data */
3776   *coarse_size_n = coarse_size;
3777   *local_primal_indices_n = local_primal_indices;
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "PCBDDCGlobalToLocal"
3783 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3784 {
3785   PC_IS*         pcis = (PC_IS*)pc->data;
3786   IS             localis_t;
3787   PetscInt       i,lsize,*idxs;
3788   PetscScalar    *vals;
3789   PetscErrorCode ierr;
3790 
3791   PetscFunctionBegin;
3792   /* get dirichlet indices in local ordering exploiting local to global map */
3793   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3794   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3795   for (i=0;i<lsize;i++) vals[i] = 1.0;
3796   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3797   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3798   if (idxs) { /* multilevel guard */
3799     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3800   }
3801   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3802   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3803   ierr = PetscFree(vals);CHKERRQ(ierr);
3804   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3805   /* now compute dirichlet set in local ordering */
3806   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3807   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3808   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3809   for (i=0,lsize=0;i<pcis->n;i++) {
3810     if (vals[i] == 1.0) {
3811       lsize++;
3812     }
3813   }
3814   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3815   for (i=0,lsize=0;i<pcis->n;i++) {
3816     if (vals[i] == 1.0) {
3817       idxs[lsize++] = i;
3818     }
3819   }
3820   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3821   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3822   *localis = localis_t;
3823   PetscFunctionReturn(0);
3824 }
3825