xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision fa7f1dd8e278332e434e1b3656065318d124c448)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
23 
24   /* Change global null space passed in by the user if change of basis has been requested */
25   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
26     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
27   }
28 
29   /*
30      Setup local correction and local part of coarse basis.
31      Gives back the dense local part of the coarse matrix in column major ordering
32   */
33   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
34 
35   /* Compute total number of coarse nodes and setup coarse solver */
36   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
37 
38   /* free */
39   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCBDDCResetCustomization"
45 PetscErrorCode PCBDDCResetCustomization(PC pc)
46 {
47   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
48   PetscInt       i;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
57   for (i=0;i<pcbddc->n_ISForDofs;i++) {
58     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
59   }
60   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
61   pcbddc->n_ISForDofs = 0;
62   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
63   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_vertices,n_constraints,local_primal_size,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr);
130   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
131   n_constraints = local_primal_size - n_vertices;
132   n_R = pcis->n-n_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   ISLocalToGlobalMapping BtoNmap;
190   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
191   const PetscScalar      *array,*row_cmat_values;
192   const PetscInt         *row_cmat_indices,*idx_R_local;
193   PetscInt               *vertices,*idx_V_B,*auxindices;
194   PetscInt               n_vertices,n_constraints,size_of_constraint;
195   PetscInt               i,j,n_R,n_D,n_B;
196   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
197   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
198   MatType                impMatType;
199   /* some shortcuts to scalars */
200   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
201   /* for debugging purposes */
202   PetscReal              *coarsefunctions_errors,*constraints_errors;
203 
204   PetscFunctionBegin;
205   /* get number of vertices and their local indices */
206   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
221   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
222   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
223   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
224   if (i != n_vertices) {
225     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
226   }
227 
228   /* Precompute stuffs needed for preprocessing and application of BDDC*/
229   if (n_constraints) {
230     /* see if we can save some allocations */
231     if (pcbddc->local_auxmat2) {
232       PetscInt on_R,on_constraints;
233       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
234       if (on_R != n_R || on_constraints != n_constraints) {
235         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
236         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
237       }
238     }
239     /* work vectors */
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
242     /* auxiliary matrices */
243     if (!pcbddc->local_auxmat2) {
244       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
245       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
246       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
247       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
248     }
249 
250     /* Extract constraints on R nodes: C_{CR}  */
251     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
252     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
253     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
254 
255     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
256     for (i=0;i<n_constraints;i++) {
257       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
258       /* Get row of constraint matrix in R numbering */
259       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
261       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
262       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
263       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
264       /* Solve for row of constraint matrix in R numbering */
265       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
266       /* Set values in local_auxmat2 */
267       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
269       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
270     }
271     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
274 
275     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
276     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
277     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
278     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
279     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
280     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
281     ierr = MatSetUp(M1);CHKERRQ(ierr);
282     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
283     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
284     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
285     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
286     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
287     ierr = MatDestroy(&M2);CHKERRQ(ierr);
288     ierr = MatDestroy(&M3);CHKERRQ(ierr);
289     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
290     if (!pcbddc->local_auxmat1) {
291       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     } else {
293       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
294     }
295   }
296 
297   /* Get submatrices from subdomain matrix */
298   if (n_vertices) {
299     PetscInt ibs,mbs;
300     PetscBool issbaij;
301     Mat newmat;
302 
303     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
304     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
305     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
306     if (ibs != mbs) { /* need to convert to SEQAIJ */
307       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
312     } else {
313       /* this is safe */
314       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
315       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
316       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
317         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
318         /* which of the two approaches is faster? */
319         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
321         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
322         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
323         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
324       } else {
325         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
326         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
327       }
328     }
329     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
330     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
331     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
332   }
333 
334   /* Matrix of coarse basis functions (local) */
335   if (pcbddc->coarse_phi_B) {
336     PetscInt on_B,on_primal;
337     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
338     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
339       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
340       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
341     }
342   }
343   if (pcbddc->coarse_phi_D) {
344     PetscInt on_D,on_primal;
345     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
346     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
347       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
348       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
349     }
350   }
351   if (!pcbddc->coarse_phi_B) {
352     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
353     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
354     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
355     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
356   }
357   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
358     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
359     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
360     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
361     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
362   }
363 
364   if (pcbddc->dbg_flag) {
365     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
366     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
367     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
368   }
369   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
370   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
371 
372   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
373 
374   /* vertices */
375   for (i=0;i<n_vertices;i++) {
376     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
377     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
378     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
379     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
380     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
381     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
382     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
383     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
386     if (n_constraints) {
387       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
388       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
389       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
390     }
391     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
392     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
393 
394     /* Set values in coarse basis function and subdomain part of coarse_mat */
395     /* coarse basis functions */
396     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
397     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
401     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
402     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
403     if (pcbddc->switch_static || pcbddc->dbg_flag) {
404       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
408       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409     }
410     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
411     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
414     if (n_constraints) {
415       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
416       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
417       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
418     }
419 
420     /* check */
421     if (pcbddc->dbg_flag) {
422       /* assemble subdomain vector on local nodes */
423       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
424       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
434       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
435       if (n_constraints) {
436         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
437         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
438         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
439       }
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
453     }
454   }
455 
456   /* constraints */
457   for (i=0;i<n_constraints;i++) {
458     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
459     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
460     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
461     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
462     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
463     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
464     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
465     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
466     if (n_vertices) {
467       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
468     }
469     /* Set values in coarse basis function and subdomain part of coarse_mat */
470     /* coarse basis functions */
471     j = i+n_vertices; /* don't touch this! */
472     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
473     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
477     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
478     if (pcbddc->switch_static || pcbddc->dbg_flag) {
479       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
483       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
484     }
485     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
486     if (n_vertices) {
487       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
488       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
489       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
490     }
491     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
492     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
494 
495     if (pcbddc->dbg_flag) {
496       /* assemble subdomain vector on nodes */
497       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
498       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
500       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
501       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
502       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
503       /* assemble subdomain vector of lagrange multipliers */
504       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
505       if (n_vertices) {
506         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
509       }
510       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
513       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
514       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
515       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
516       /* check saddle point solution */
517       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
518       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
519       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
520       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
521       /* shift by the identity matrix */
522       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
523       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
524       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
525       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
526     }
527   }
528   /* call assembling routines for local coarse basis */
529   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   if (pcbddc->switch_static || pcbddc->dbg_flag) {
532     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534   }
535 
536   /* compute other basis functions for non-symmetric problems */
537   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
538   if (!setsym || (setsym && !issym)) {
539     if (!pcbddc->coarse_psi_B) {
540       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
541       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
542       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
543       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
544     }
545     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
546       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
547       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
548       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
549       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
550     }
551     for (i=0;i<pcbddc->local_primal_size;i++) {
552       if (n_constraints) {
553         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
554         for (j=0;j<n_constraints;j++) {
555           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
556         }
557         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
558         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
559       }
560       if (i<n_vertices) {
561         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
562         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
563         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
564         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
565         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
566         if (n_constraints) {
567           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
568         }
569       } else {
570         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
571       }
572       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
573       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
574       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
575       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
576       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
578       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
579       if (i<n_vertices) {
580         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
581       }
582       if (pcbddc->switch_static || pcbddc->dbg_flag) {
583         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
585         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
587         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
588       }
589 
590       if (pcbddc->dbg_flag) {
591         /* assemble subdomain vector on nodes */
592         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
593         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
595         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
596         if (i<n_vertices) {
597           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
598         }
599         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
600         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
601         /* assemble subdomain vector of lagrange multipliers */
602         for (j=0;j<pcbddc->local_primal_size;j++) {
603           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
604         }
605         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
606         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
607         /* check saddle point solution */
608         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
609         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
610         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
611         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
612         /* shift by the identity matrix */
613         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
614         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
615         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
616         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
617       }
618     }
619     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     if (pcbddc->switch_static || pcbddc->dbg_flag) {
622       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
623       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
624     }
625   }
626   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
627   /* Checking coarse_sub_mat and coarse basis functios */
628   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
629   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
630   if (pcbddc->dbg_flag) {
631     Mat         coarse_sub_mat;
632     Mat         AUXMAT,TM1,TM2,TM3,TM4;
633     Mat         coarse_phi_D,coarse_phi_B;
634     Mat         coarse_psi_D,coarse_psi_B;
635     Mat         A_II,A_BB,A_IB,A_BI;
636     MatType     checkmattype=MATSEQAIJ;
637     PetscReal   real_value;
638 
639     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
640     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
641     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
642     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
643     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
644     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
645     if (pcbddc->coarse_psi_B) {
646       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
647       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
648     }
649     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
650 
651     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
652     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
653     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
654     if (pcbddc->coarse_psi_B) {
655       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
656       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
657       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
658       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
659       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
660       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
661       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
665       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
666       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
667     } else {
668       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
669       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
670       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
674       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
675       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
676     }
677     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
680     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
681     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
682     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
685     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
686     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
687     for (i=0;i<pcbddc->local_primal_size;i++) {
688       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
689     }
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
691     for (i=0;i<pcbddc->local_primal_size;i++) {
692       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
693     }
694     if (pcbddc->coarse_psi_B) {
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
696       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
697         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
698       }
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
700       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
701         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
702       }
703     }
704     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
707     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
708     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
711     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
712     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
713     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
714     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
715     if (pcbddc->coarse_psi_B) {
716       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
717       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
718     }
719     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
720     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
721     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
722     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
723   }
724   /* free memory */
725   if (n_vertices) {
726     ierr = PetscFree(vertices);CHKERRQ(ierr);
727     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
728     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
732   }
733   if (n_constraints) {
734     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
735     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
736     ierr = MatDestroy(&M1);CHKERRQ(ierr);
737     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
738   }
739   ierr = PetscFree(auxindices);CHKERRQ(ierr);
740   /* get back data */
741   *coarse_submat_vals_n = coarse_submat_vals;
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
747 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
748 {
749   PC_IS*            pcis = (PC_IS*)(pc->data);
750   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
751   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
752   PetscBool         issbaij,isbaij;
753   /* manage repeated solves */
754   MatReuse          reuse;
755   MatStructure      matstruct;
756   PetscErrorCode    ierr;
757 
758   PetscFunctionBegin;
759   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
760     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
761   }
762   /* get mat flags */
763   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
764   reuse = MAT_INITIAL_MATRIX;
765   if (pc->setupcalled) {
766     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
767     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
768     if (matstruct == SAME_NONZERO_PATTERN) {
769       reuse = MAT_REUSE_MATRIX;
770     } else {
771       reuse = MAT_INITIAL_MATRIX;
772     }
773   }
774   if (reuse == MAT_INITIAL_MATRIX) {
775     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
777     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
780   }
781 
782   /* transform local matrices if needed */
783   if (pcbddc->use_change_of_basis) {
784     Mat         change_mat_all;
785     PetscScalar *row_cmat_values;
786     PetscInt    *row_cmat_indices;
787     PetscInt    *nnz,*is_indices,*temp_indices;
788     PetscInt    i,j,k,n_D,n_B;
789 
790     /* Get Non-overlapping dimensions */
791     n_B = pcis->n_B;
792     n_D = pcis->n-n_B;
793 
794     /* compute nonzero structure of change of basis on all local nodes */
795     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
796     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
797     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
798     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
800     k=1;
801     for (i=0;i<n_B;i++) {
802       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
803       nnz[is_indices[i]]=j;
804       if (k < j) k = j;
805       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
806     }
807     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
808     /* assemble change of basis matrix on the whole set of local dofs */
809     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
810     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
811     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
812     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
813     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
814     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     for (i=0;i<n_D;i++) {
816       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
817     }
818     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
819     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820     for (i=0;i<n_B;i++) {
821       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
822       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
823       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
824       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
825     }
826     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
829     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
830     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
831     if (!issbaij && !isbaij) {
832       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
833     } else {
834       Mat work_mat;
835       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
836       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
837       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
838     }
839     /*
840     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
841     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
842     */
843     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
844     ierr = PetscFree(nnz);CHKERRQ(ierr);
845     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
846   } else {
847     /* without change of basis, the local matrix is unchanged */
848     if (!pcbddc->local_mat) {
849       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
850       pcbddc->local_mat = matis->A;
851     }
852   }
853 
854   /* get submatrices */
855   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
856   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
857   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
858   if (!issbaij) {
859     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
861   } else {
862     Mat newmat;
863     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
865     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
866     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
873 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
874 {
875   PC_IS*         pcis = (PC_IS*)(pc->data);
876   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
877   IS             is_aux1,is_aux2;
878   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
879   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
880   PetscInt       vbs,bs;
881   PetscBT        bitmask;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   /* No need to setup local scatters if primal space is unchanged */
886   if (!pcbddc->new_primal_space_local) {
887     PetscFunctionReturn(0);
888   }
889   /* destroy old objects */
890   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
892   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
893   /* Set Non-overlapping dimensions */
894   n_B = pcis->n_B; n_D = pcis->n - n_B;
895   /* get vertex indices from constraint matrix */
896   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
897 
898   /* create auxiliary bitmask */
899   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
900   for (i=0;i<n_vertices;i++) {
901     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
902   }
903 
904   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
905   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
906   for (i=0, n_R=0; i<pcis->n; i++) {
907     if (!PetscBTLookup(bitmask,i)) {
908       idx_R_local[n_R] = i;
909       n_R++;
910     }
911   }
912 
913   /* Block code */
914   vbs = 1;
915   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
916   if (bs>1 && !(n_vertices%bs)) {
917     PetscBool is_blocked = PETSC_TRUE;
918     PetscInt  *vary;
919     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
920     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
921     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
922     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
923     for (i=0; i<n_vertices; i++) {
924       if (vary[i]!=0 && vary[i]!=bs) {
925         is_blocked = PETSC_FALSE;
926         break;
927       }
928     }
929     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
930       vbs = bs;
931       for (i=0;i<n_R/vbs;i++) {
932         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
933       }
934     }
935     ierr = PetscFree(vary);CHKERRQ(ierr);
936   }
937   ierr = PetscFree(vertices);CHKERRQ(ierr);
938   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
939   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
940 
941   /* print some info if requested */
942   if (pcbddc->dbg_flag) {
943     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
944     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
948     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
949     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
950   }
951 
952   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
953   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
954   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
955   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
956   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
957   for (i=0; i<n_D; i++) {
958     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
959   }
960   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
961   for (i=0, j=0; i<n_R; i++) {
962     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
963       aux_array1[j++] = i;
964     }
965   }
966   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
967   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_B; i++) {
969     if (!PetscBTLookup(bitmask,is_indices[i])) {
970       aux_array2[j++] = i;
971     }
972   }
973   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
975   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
976   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
977   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
978 
979   if (pcbddc->switch_static || pcbddc->dbg_flag) {
980     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
981     for (i=0, j=0; i<n_R; i++) {
982       if (PetscBTLookup(bitmask,idx_R_local[i])) {
983         aux_array1[j++] = i;
984       }
985     }
986     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
987     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
988     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
989   }
990   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
991   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
998 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
999 {
1000   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1001   PC_IS          *pcis = (PC_IS*)pc->data;
1002   PC             pc_temp;
1003   Mat            A_RR;
1004   Vec            vec1,vec2,vec3;
1005   MatStructure   matstruct;
1006   MatReuse       reuse;
1007   PetscScalar    m_one = -1.0;
1008   PetscReal      value;
1009   PetscInt       n_D,n_R,ibs,mbs;
1010   PetscBool      use_exact,use_exact_reduced,issbaij;
1011   PetscErrorCode ierr;
1012   /* prefixes stuff */
1013   char           dir_prefix[256],neu_prefix[256],str_level[3];
1014   size_t         len;
1015 
1016   PetscFunctionBegin;
1017   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1018 
1019   /* compute prefixes */
1020   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1021   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1022   if (!pcbddc->current_level) {
1023     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1025     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1026     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1027   } else {
1028     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1029     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1030     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1031     len -= 15; /* remove "pc_bddc_coarse_" */
1032     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1033     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1035     *(dir_prefix+len)='\0';
1036     *(neu_prefix+len)='\0';
1037     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1039     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1040     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1041   }
1042 
1043   /* DIRICHLET PROBLEM */
1044   /* Matrix for Dirichlet problem is pcis->A_II */
1045   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1046   if (!pcbddc->ksp_D) { /* create object if not yet build */
1047     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1048     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1049     /* default */
1050     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1051     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1052     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1053     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1054     if (issbaij) {
1055       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1056     } else {
1057       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1058     }
1059     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1060   }
1061   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1062   /* Allow user's customization */
1063   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1064   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1065   if (!n_D) {
1066     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1067     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1068   }
1069   /* Set Up KSP for Dirichlet problem of BDDC */
1070   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1071   /* set ksp_D into pcis data */
1072   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1073   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1074   pcis->ksp_D = pcbddc->ksp_D;
1075 
1076   /* NEUMANN PROBLEM */
1077   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1078   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1079   if (pcbddc->ksp_R) { /* already created ksp */
1080     PetscInt nn_R;
1081     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1082     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1083     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1084     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1085       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1086       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1087       reuse = MAT_INITIAL_MATRIX;
1088     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1089       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1090         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091         reuse = MAT_INITIAL_MATRIX;
1092       } else { /* safe to reuse the matrix */
1093         reuse = MAT_REUSE_MATRIX;
1094       }
1095     }
1096     /* last check */
1097     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1098       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099       reuse = MAT_INITIAL_MATRIX;
1100     }
1101   } else { /* first time, so we need to create the matrix */
1102     reuse = MAT_INITIAL_MATRIX;
1103   }
1104   /* extract A_RR */
1105   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1106   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1107   if (ibs != mbs) {
1108     Mat newmat;
1109     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1110     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1111     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1112   } else {
1113     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1114   }
1115   if (!pcbddc->ksp_R) { /* create object if not present */
1116     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1117     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1118     /* default */
1119     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1120     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1121     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1122     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1123     if (issbaij) {
1124       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1125     } else {
1126       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1127     }
1128     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1129   }
1130   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1131   /* Allow user's customization */
1132   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1133   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1134   if (!n_R) {
1135     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1136     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1137   }
1138   /* Set Up KSP for Neumann problem of BDDC */
1139   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1140 
1141   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1142 
1143   /* Dirichlet */
1144   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1145   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1146   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1147   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1148   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1149   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1150   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1151   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1152   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1153   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1154   /* need to be adapted? */
1155   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1156   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1157   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1158   /* print info */
1159   if (pcbddc->dbg_flag) {
1160     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1161     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1162     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1163     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1164     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1165   }
1166   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1167     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1168   }
1169 
1170   /* Neumann */
1171   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1172   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1173   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1174   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1175   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1176   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1177   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1178   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1179   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1180   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1181   /* need to be adapted? */
1182   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1183   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1184   /* print info */
1185   if (pcbddc->dbg_flag) {
1186     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1187     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1188   }
1189   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1190     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1191   }
1192   /* free Neumann problem's matrix */
1193   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1199 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1200 {
1201   PetscErrorCode ierr;
1202   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1203 
1204   PetscFunctionBegin;
1205   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1206   if (pcbddc->local_auxmat1) {
1207     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1208     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1215 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1216 {
1217   PetscErrorCode ierr;
1218   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1219   PC_IS*            pcis = (PC_IS*)  (pc->data);
1220   const PetscScalar zero = 0.0;
1221 
1222   PetscFunctionBegin;
1223   /* Application of PHI^T (or PSI^T)  */
1224   if (pcbddc->coarse_psi_B) {
1225     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1226     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1227   } else {
1228     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1229     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1230   }
1231   /* Scatter data of coarse_rhs */
1232   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1233   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234 
1235   /* Local solution on R nodes */
1236   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1237   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239   if (pcbddc->switch_static) {
1240     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1241     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1242   }
1243   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1244   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1245   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247   if (pcbddc->switch_static) {
1248     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250   }
1251 
1252   /* Coarse solution */
1253   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1255     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1256   }
1257   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259 
1260   /* Sum contributions from two levels */
1261   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1262   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1268 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1269 {
1270   PetscErrorCode ierr;
1271   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1272 
1273   PetscFunctionBegin;
1274   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1280 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1281 {
1282   PetscErrorCode ierr;
1283   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1284 
1285   PetscFunctionBegin;
1286   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* uncomment for testing purposes */
1291 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1292 #undef __FUNCT__
1293 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1294 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1295 {
1296   PetscErrorCode    ierr;
1297   PC_IS*            pcis = (PC_IS*)(pc->data);
1298   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1299   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1300   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1301   MatType           impMatType=MATSEQAIJ;
1302   /* one and zero */
1303   PetscScalar       one=1.0,zero=0.0;
1304   /* space to store constraints and their local indices */
1305   PetscScalar       *temp_quadrature_constraint;
1306   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1307   /* iterators */
1308   PetscInt          i,j,k,total_counts,temp_start_ptr;
1309   /* stuff to store connected components stored in pcbddc->mat_graph */
1310   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1311   PetscInt          n_ISForFaces,n_ISForEdges;
1312   /* near null space stuff */
1313   MatNullSpace      nearnullsp;
1314   const Vec         *nearnullvecs;
1315   Vec               *localnearnullsp;
1316   PetscBool         nnsp_has_cnst;
1317   PetscInt          nnsp_size;
1318   PetscScalar       *array;
1319   /* BLAS integers */
1320   PetscBLASInt      lwork,lierr;
1321   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1322   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1323   /* LAPACK working arrays for SVD or POD */
1324   PetscBool         skip_lapack;
1325   PetscScalar       *work;
1326   PetscReal         *singular_vals;
1327 #if defined(PETSC_USE_COMPLEX)
1328   PetscReal         *rwork;
1329 #endif
1330 #if defined(PETSC_MISSING_LAPACK_GESVD)
1331   PetscBLASInt      Blas_one_2=1;
1332   PetscScalar       *temp_basis,*correlation_mat;
1333 #else
1334   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1335   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1336 #endif
1337   /* reuse */
1338   PetscInt          olocal_primal_size;
1339   PetscInt          *oprimal_indices_local_idxs;
1340   /* change of basis */
1341   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1342   PetscBool         boolforchange,qr_needed;
1343   PetscBT           touched,change_basis,qr_needed_idx;
1344   /* auxiliary stuff */
1345   PetscInt          *nnz,*is_indices,*local_to_B;
1346   /* some quantities */
1347   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1348   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1349 
1350 
1351   PetscFunctionBegin;
1352   /* Destroy Mat objects computed previously */
1353   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1354   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1355   /* Get index sets for faces, edges and vertices from graph */
1356   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1357     pcbddc->use_vertices = PETSC_TRUE;
1358   }
1359   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1360   /* HACK: provide functions to set change of basis */
1361   if (!ISForVertices && pcbddc->NullSpace) {
1362     pcbddc->use_change_of_basis = PETSC_TRUE;
1363     pcbddc->use_change_on_faces = PETSC_FALSE;
1364   }
1365   /* print some info */
1366   if (pcbddc->dbg_flag) {
1367     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1368     i = 0;
1369     if (ISForVertices) {
1370       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1371     }
1372     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1373     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1374     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1375     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1376   }
1377   /* check if near null space is attached to global mat */
1378   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1379   if (nearnullsp) {
1380     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1381     /* remove any stored info */
1382     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1383     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1384     /* store information for BDDC solver reuse */
1385     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1386     pcbddc->onearnullspace = nearnullsp;
1387     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1388     for (i=0;i<nnsp_size;i++) {
1389       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1390     }
1391   } else { /* if near null space is not provided BDDC uses constants by default */
1392     nnsp_size = 0;
1393     nnsp_has_cnst = PETSC_TRUE;
1394   }
1395   /* get max number of constraints on a single cc */
1396   max_constraints = nnsp_size;
1397   if (nnsp_has_cnst) max_constraints++;
1398 
1399   /*
1400        Evaluate maximum storage size needed by the procedure
1401        - temp_indices will contain start index of each constraint stored as follows
1402        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1403        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1404        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1405                                                                                                                                                          */
1406   total_counts = n_ISForFaces+n_ISForEdges;
1407   total_counts *= max_constraints;
1408   n_vertices = 0;
1409   if (ISForVertices) {
1410     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1411   }
1412   total_counts += n_vertices;
1413   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1414   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1415   total_counts = 0;
1416   max_size_of_constraint = 0;
1417   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1418     if (i<n_ISForEdges) {
1419       used_IS = &ISForEdges[i];
1420     } else {
1421       used_IS = &ISForFaces[i-n_ISForEdges];
1422     }
1423     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1424     total_counts += j;
1425     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1426   }
1427   total_counts *= max_constraints;
1428   total_counts += n_vertices;
1429   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1430   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1431   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1432   /* local to boundary numbering */
1433   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1434   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1435   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1436   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1437   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1438   /* get local part of global near null space vectors */
1439   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1440   for (k=0;k<nnsp_size;k++) {
1441     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1442     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444   }
1445 
1446   /* whether or not to skip lapack calls */
1447   skip_lapack = PETSC_TRUE;
1448   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1449 
1450   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1451   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1452     PetscScalar temp_work;
1453 #if defined(PETSC_MISSING_LAPACK_GESVD)
1454     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1455     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1456     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1457     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1458 #if defined(PETSC_USE_COMPLEX)
1459     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1460 #endif
1461     /* now we evaluate the optimal workspace using query with lwork=-1 */
1462     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1463     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1464     lwork = -1;
1465     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1466 #if !defined(PETSC_USE_COMPLEX)
1467     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1468 #else
1469     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1470 #endif
1471     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1472     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1473 #else /* on missing GESVD */
1474     /* SVD */
1475     PetscInt max_n,min_n;
1476     max_n = max_size_of_constraint;
1477     min_n = max_constraints;
1478     if (max_size_of_constraint < max_constraints) {
1479       min_n = max_size_of_constraint;
1480       max_n = max_constraints;
1481     }
1482     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1483 #if defined(PETSC_USE_COMPLEX)
1484     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1485 #endif
1486     /* now we evaluate the optimal workspace using query with lwork=-1 */
1487     lwork = -1;
1488     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1489     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1490     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1491     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1492 #if !defined(PETSC_USE_COMPLEX)
1493     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1494 #else
1495     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1496 #endif
1497     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1498     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1499 #endif /* on missing GESVD */
1500     /* Allocate optimal workspace */
1501     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1502     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1503   }
1504   /* Now we can loop on constraining sets */
1505   total_counts = 0;
1506   temp_indices[0] = 0;
1507   /* vertices */
1508   if (ISForVertices) {
1509     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1510     if (nnsp_has_cnst) { /* consider all vertices */
1511       for (i=0;i<n_vertices;i++) {
1512         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1513         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1514         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1515         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1516         total_counts++;
1517       }
1518     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1519       PetscBool used_vertex;
1520       for (i=0;i<n_vertices;i++) {
1521         used_vertex = PETSC_FALSE;
1522         k = 0;
1523         while (!used_vertex && k<nnsp_size) {
1524           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1525           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1526             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1527             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1528             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1529             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1530             total_counts++;
1531             used_vertex = PETSC_TRUE;
1532           }
1533           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1534           k++;
1535         }
1536       }
1537     }
1538     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1539     n_vertices = total_counts;
1540   }
1541 
1542   /* edges and faces */
1543   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1544     if (i<n_ISForEdges) {
1545       used_IS = &ISForEdges[i];
1546       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1547     } else {
1548       used_IS = &ISForFaces[i-n_ISForEdges];
1549       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1550     }
1551     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1552     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1553     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1554     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1555     /* change of basis should not be performed on local periodic nodes */
1556     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1557     if (nnsp_has_cnst) {
1558       PetscScalar quad_value;
1559       temp_constraints++;
1560       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1561       for (j=0;j<size_of_constraint;j++) {
1562         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1563         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1564         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1565       }
1566       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1567       total_counts++;
1568     }
1569     for (k=0;k<nnsp_size;k++) {
1570       PetscReal real_value;
1571       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1572       for (j=0;j<size_of_constraint;j++) {
1573         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1574         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1575         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1576       }
1577       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1578       /* check if array is null on the connected component */
1579       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1580       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1581       if (real_value > 0.0) { /* keep indices and values */
1582         temp_constraints++;
1583         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1584         total_counts++;
1585       }
1586     }
1587     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1588     valid_constraints = temp_constraints;
1589     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1590     if (!pcbddc->use_nnsp_true && temp_constraints) {
1591       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1592 
1593 #if defined(PETSC_MISSING_LAPACK_GESVD)
1594       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1595          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1596          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1597             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1598             from that computed using LAPACKgesvd
1599          -> This is due to a different computation of eigenvectors in LAPACKheev
1600          -> The quality of the POD-computed basis will be the same */
1601       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1602       /* Store upper triangular part of correlation matrix */
1603       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1604       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1605       for (j=0;j<temp_constraints;j++) {
1606         for (k=0;k<j+1;k++) {
1607           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1608         }
1609       }
1610       /* compute eigenvalues and eigenvectors of correlation matrix */
1611       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1612       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1613 #if !defined(PETSC_USE_COMPLEX)
1614       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1615 #else
1616       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1617 #endif
1618       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1619       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1620       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1621       j=0;
1622       while (j < temp_constraints && singular_vals[j] < tol) j++;
1623       total_counts=total_counts-j;
1624       valid_constraints = temp_constraints-j;
1625       /* scale and copy POD basis into used quadrature memory */
1626       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1628       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1629       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1630       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1631       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1632       if (j<temp_constraints) {
1633         PetscInt ii;
1634         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1635         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1636         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1637         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1638         for (k=0;k<temp_constraints-j;k++) {
1639           for (ii=0;ii<size_of_constraint;ii++) {
1640             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1641           }
1642         }
1643       }
1644 #else  /* on missing GESVD */
1645       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1646       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1647       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1648       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1649 #if !defined(PETSC_USE_COMPLEX)
1650       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1651 #else
1652       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1653 #endif
1654       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1655       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1656       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1657       k = temp_constraints;
1658       if (k > size_of_constraint) k = size_of_constraint;
1659       j = 0;
1660       while (j < k && singular_vals[k-j-1] < tol) j++;
1661       total_counts = total_counts-temp_constraints+k-j;
1662       valid_constraints = k-j;
1663 #endif /* on missing GESVD */
1664     }
1665     /* setting change_of_basis flag is safe now */
1666     if (boolforchange) {
1667       for (j=0;j<valid_constraints;j++) {
1668         PetscBTSet(change_basis,total_counts-j-1);
1669       }
1670     }
1671   }
1672   /* free index sets of faces, edges and vertices */
1673   for (i=0;i<n_ISForFaces;i++) {
1674     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1675   }
1676   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1677   for (i=0;i<n_ISForEdges;i++) {
1678     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1679   }
1680   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1681   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1682 
1683   /* free workspace */
1684   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1685     ierr = PetscFree(work);CHKERRQ(ierr);
1686 #if defined(PETSC_USE_COMPLEX)
1687     ierr = PetscFree(rwork);CHKERRQ(ierr);
1688 #endif
1689     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1690 #if defined(PETSC_MISSING_LAPACK_GESVD)
1691     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1692     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1693 #endif
1694   }
1695   for (k=0;k<nnsp_size;k++) {
1696     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1697   }
1698   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1699 
1700   /* set quantities in pcbddc data structure and store previous primal size */
1701   /* n_vertices defines the number of subdomain corners in the primal space */
1702   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1703   olocal_primal_size = pcbddc->local_primal_size;
1704   pcbddc->local_primal_size = total_counts;
1705   pcbddc->n_vertices = n_vertices;
1706   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1707 
1708   /* Create constraint matrix */
1709   /* The constraint matrix is used to compute the l2g map of primal dofs */
1710   /* so we need to set it up properly either with or without change of basis */
1711   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1712   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1713   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1714   /* array to compute a local numbering of constraints : vertices first then constraints */
1715   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1716   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1717   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1718   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1719   /* auxiliary stuff for basis change */
1720   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1721   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1722 
1723   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1724   total_primal_vertices=0;
1725   for (i=0;i<pcbddc->local_primal_size;i++) {
1726     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1727     if (size_of_constraint == 1) {
1728       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1729       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1730       aux_primal_minloc[total_primal_vertices]=0;
1731       total_primal_vertices++;
1732     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1733       PetscInt min_loc,min_index;
1734       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1735       /* find first untouched local node */
1736       k = 0;
1737       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1738       min_index = global_indices[k];
1739       min_loc = k;
1740       /* search the minimum among global nodes already untouched on the cc */
1741       for (k=1;k<size_of_constraint;k++) {
1742         /* there can be more than one constraint on a single connected component */
1743         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1744           min_index = global_indices[k];
1745           min_loc = k;
1746         }
1747       }
1748       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1749       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1750       aux_primal_minloc[total_primal_vertices]=min_loc;
1751       total_primal_vertices++;
1752     }
1753   }
1754   /* determine if a QR strategy is needed for change of basis */
1755   qr_needed = PETSC_FALSE;
1756   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1757   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1758     if (PetscBTLookup(change_basis,i)) {
1759       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1760       j = 0;
1761       for (k=0;k<size_of_constraint;k++) {
1762         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1763           j++;
1764         }
1765       }
1766       /* found more than one primal dof on the cc */
1767       if (j > 1) {
1768         PetscBTSet(qr_needed_idx,i);
1769         qr_needed = PETSC_TRUE;
1770       }
1771     }
1772   }
1773   /* free workspace */
1774   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1775 
1776   /* permute indices in order to have a sorted set of vertices */
1777   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1778 
1779   /* nonzero structure of constraint matrix */
1780   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1781   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1782   j=total_primal_vertices;
1783   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1784     if (!PetscBTLookup(change_basis,i)) {
1785       nnz[j]=temp_indices[i+1]-temp_indices[i];
1786       j++;
1787     }
1788   }
1789   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1790   ierr = PetscFree(nnz);CHKERRQ(ierr);
1791   /* set values in constraint matrix */
1792   for (i=0;i<total_primal_vertices;i++) {
1793     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1794   }
1795   total_counts = total_primal_vertices;
1796   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1797     if (!PetscBTLookup(change_basis,i)) {
1798       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1799       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1800       total_counts++;
1801     }
1802   }
1803   /* assembling */
1804   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806   /*
1807   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1808   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1809   */
1810   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1811   if (pcbddc->use_change_of_basis) {
1812     /* dual and primal dofs on a single cc */
1813     PetscInt     dual_dofs,primal_dofs;
1814     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1815     PetscInt     primal_counter;
1816     /* working stuff for GEQRF */
1817     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1818     PetscBLASInt lqr_work;
1819     /* working stuff for UNGQR */
1820     PetscScalar  *gqr_work,lgqr_work_t;
1821     PetscBLASInt lgqr_work;
1822     /* working stuff for TRTRS */
1823     PetscScalar  *trs_rhs;
1824     PetscBLASInt Blas_NRHS;
1825     /* pointers for values insertion into change of basis matrix */
1826     PetscInt     *start_rows,*start_cols;
1827     PetscScalar  *start_vals;
1828     /* working stuff for values insertion */
1829     PetscBT      is_primal;
1830 
1831     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1832     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1833     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1834     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1835     /* work arrays */
1836     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1837     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1838     /* nonzeros per row */
1839     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1840       if (PetscBTLookup(change_basis,i)) {
1841         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1842         if (PetscBTLookup(qr_needed_idx,i)) {
1843           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1844         } else {
1845           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1846           /* get local primal index on the cc */
1847           j = 0;
1848           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1849           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1850         }
1851       }
1852     }
1853     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1854     ierr = PetscFree(nnz);CHKERRQ(ierr);
1855     /* Set initial identity in the matrix */
1856     for (i=0;i<pcis->n_B;i++) {
1857       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1858     }
1859 
1860     if (pcbddc->dbg_flag) {
1861       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1862       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1863     }
1864 
1865 
1866     /* Now we loop on the constraints which need a change of basis */
1867     /*
1868        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1869        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1870 
1871        Basic blocks of change of basis matrix T computed
1872 
1873           - Using the following block transformation if there is only a primal dof on the cc
1874             (in the example, primal dof is the last one of the edge in LOCAL ordering
1875              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1876             | 1        0   ...        0     1 |
1877             | 0        1   ...        0     1 |
1878             |              ...                |
1879             | 0        ...            1     1 |
1880             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1881 
1882           - via QR decomposition of constraints otherwise
1883     */
1884     if (qr_needed) {
1885       /* space to store Q */
1886       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1887       /* first we issue queries for optimal work */
1888       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1889       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1890       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1891       lqr_work = -1;
1892       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1893       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1894       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1895       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1896       lgqr_work = -1;
1897       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1898       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1899       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1900       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1901       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1902       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1903       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1904       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1905       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1906       /* array to store scaling factors for reflectors */
1907       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1908       /* array to store rhs and solution of triangular solver */
1909       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1910       /* allocating workspace for check */
1911       if (pcbddc->dbg_flag) {
1912         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1913       }
1914     }
1915     /* array to store whether a node is primal or not */
1916     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1917     for (i=0;i<total_primal_vertices;i++) {
1918       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1919     }
1920 
1921     /* loop on constraints and see whether or not they need a change of basis and compute it */
1922     /* -> using implicit ordering contained in temp_indices data */
1923     total_counts = pcbddc->n_vertices;
1924     primal_counter = total_counts;
1925     while (total_counts<pcbddc->local_primal_size) {
1926       primal_dofs = 1;
1927       if (PetscBTLookup(change_basis,total_counts)) {
1928         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1929         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1930           primal_dofs++;
1931         }
1932         /* get constraint info */
1933         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1934         dual_dofs = size_of_constraint-primal_dofs;
1935 
1936         if (pcbddc->dbg_flag) {
1937           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1938         }
1939 
1940         if (primal_dofs > 1) { /* QR */
1941 
1942           /* copy quadrature constraints for change of basis check */
1943           if (pcbddc->dbg_flag) {
1944             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1945           }
1946           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1947           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1948 
1949           /* compute QR decomposition of constraints */
1950           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1951           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1952           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1953           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1954           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1955           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1956           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1957 
1958           /* explictly compute R^-T */
1959           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1960           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1961           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1962           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1963           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1964           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1965           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1966           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1967           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1968           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1969 
1970           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1971           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1975           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1976           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1977           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1978           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979 
1980           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1981              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1982              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1983           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1984           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1985           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1986           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1987           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1988           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1989           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1990           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1991           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1992           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1993 
1994           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1995           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1996           /* insert cols for primal dofs */
1997           for (j=0;j<primal_dofs;j++) {
1998             start_vals = &qr_basis[j*size_of_constraint];
1999             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2000             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2001           }
2002           /* insert cols for dual dofs */
2003           for (j=0,k=0;j<dual_dofs;k++) {
2004             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2005               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2006               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2007               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2008               j++;
2009             }
2010           }
2011 
2012           /* check change of basis */
2013           if (pcbddc->dbg_flag) {
2014             PetscInt   ii,jj;
2015             PetscBool valid_qr=PETSC_TRUE;
2016             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2017             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2018             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2019             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2020             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2021             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2022             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2023             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2024             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2025             for (jj=0;jj<size_of_constraint;jj++) {
2026               for (ii=0;ii<primal_dofs;ii++) {
2027                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2028                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2029               }
2030             }
2031             if (!valid_qr) {
2032               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2033               for (jj=0;jj<size_of_constraint;jj++) {
2034                 for (ii=0;ii<primal_dofs;ii++) {
2035                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2036                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2037                   }
2038                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2039                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2040                   }
2041                 }
2042               }
2043             } else {
2044               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2045             }
2046           }
2047         } else { /* simple transformation block */
2048           PetscInt row,col;
2049           PetscScalar val;
2050           for (j=0;j<size_of_constraint;j++) {
2051             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2052             if (!PetscBTLookup(is_primal,row)) {
2053               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2054               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2055               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2056             } else {
2057               for (k=0;k<size_of_constraint;k++) {
2058                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2059                 if (row != col) {
2060                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2061                 } else {
2062                   val = 1.0;
2063                 }
2064                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2065               }
2066             }
2067           }
2068         }
2069         /* increment primal counter */
2070         primal_counter += primal_dofs;
2071       } else {
2072         if (pcbddc->dbg_flag) {
2073           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2074         }
2075       }
2076       /* increment constraint counter total_counts */
2077       total_counts += primal_dofs;
2078     }
2079 
2080     /* free workspace */
2081     if (qr_needed) {
2082       if (pcbddc->dbg_flag) {
2083         ierr = PetscFree(work);CHKERRQ(ierr);
2084       }
2085       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2086       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2087       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2088       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2089       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2090     }
2091     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2092     /* assembling */
2093     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095     /*
2096     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2097     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2098     */
2099   }
2100 
2101   /* get indices in local ordering for vertices and constraints */
2102   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2103     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2104     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2105   }
2106   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2107   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2108   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2109   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2110   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2111   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2112   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2113   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2114   /* check if a new primal space has been introduced */
2115   pcbddc->new_primal_space_local = PETSC_TRUE;
2116   if (olocal_primal_size == pcbddc->local_primal_size) {
2117     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2118     pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local;
2119     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2120   }
2121   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2122   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2123 
2124   /* flush dbg viewer */
2125   if (pcbddc->dbg_flag) {
2126     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2127   }
2128 
2129   /* free workspace */
2130   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2131   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2132   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2133   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2134   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2135   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2136   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2137   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2138   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2144 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2145 {
2146   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2147   PC_IS       *pcis = (PC_IS*)pc->data;
2148   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2149   PetscInt    bs,ierr,i,vertex_size;
2150   PetscViewer viewer=pcbddc->dbg_viewer;
2151 
2152   PetscFunctionBegin;
2153   /* Reset previously computed graph */
2154   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2155   /* Init local Graph struct */
2156   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2157 
2158   /* Check validity of the csr graph passed in by the user */
2159   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2160     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2161   }
2162 
2163   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2164   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2165     Mat mat_adj;
2166     const PetscInt *xadj,*adjncy;
2167     PetscBool flg_row=PETSC_TRUE;
2168 
2169     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2170     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2171     if (!flg_row) {
2172       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2173     }
2174     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2175     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2176     if (!flg_row) {
2177       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2178     }
2179     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2180   }
2181 
2182   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2183   vertex_size = 1;
2184   if (!pcbddc->user_provided_isfordofs) {
2185     if (!pcbddc->n_ISForDofs) {
2186       IS *custom_ISForDofs;
2187 
2188       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2189       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2190       for (i=0;i<bs;i++) {
2191         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2192       }
2193       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2194       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2195       /* remove my references to IS objects */
2196       for (i=0;i<bs;i++) {
2197         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2198       }
2199       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2200     }
2201   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2202     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2203   }
2204 
2205   /* Setup of Graph */
2206   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2207 
2208   /* Graph's connected components analysis */
2209   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2210 
2211   /* print some info to stdout */
2212   if (pcbddc->dbg_flag) {
2213     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2214   }
2215 
2216   /* mark topography has done */
2217   pcbddc->recompute_topography = PETSC_FALSE;
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2223 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2224 {
2225   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2226   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   n = 0;
2231   vertices = 0;
2232   if (pcbddc->ConstraintMatrix) {
2233     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2234     for (i=0;i<local_primal_size;i++) {
2235       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2236       if (size_of_constraint == 1) n++;
2237       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2238     }
2239     if (vertices_idx) {
2240       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2241       n = 0;
2242       for (i=0;i<local_primal_size;i++) {
2243         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2244         if (size_of_constraint == 1) {
2245           vertices[n++]=row_cmat_indices[0];
2246         }
2247         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2248       }
2249     }
2250   }
2251   *n_vertices = n;
2252   if (vertices_idx) *vertices_idx = vertices;
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2258 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2259 {
2260   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2261   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2262   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2263   PetscBT        touched;
2264   PetscErrorCode ierr;
2265 
2266     /* This function assumes that the number of local constraints per connected component
2267        is not greater than the number of nodes defined for the connected component
2268        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2269   PetscFunctionBegin;
2270   n = 0;
2271   constraints_index = 0;
2272   if (pcbddc->ConstraintMatrix) {
2273     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2274     max_size_of_constraint = 0;
2275     for (i=0;i<local_primal_size;i++) {
2276       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2277       if (size_of_constraint > 1) {
2278         n++;
2279       }
2280       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2281       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2282     }
2283     if (constraints_idx) {
2284       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2285       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2286       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2287       n = 0;
2288       for (i=0;i<local_primal_size;i++) {
2289         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2290         if (size_of_constraint > 1) {
2291           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2292           /* find first untouched local node */
2293           j = 0;
2294           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2295           min_index = row_cmat_global_indices[j];
2296           min_loc = j;
2297           /* search the minimum among nodes not yet touched on the connected component
2298              since there can be more than one constraint on a single cc */
2299           for (j=1;j<size_of_constraint;j++) {
2300             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2301               min_index = row_cmat_global_indices[j];
2302               min_loc = j;
2303             }
2304           }
2305           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2306           constraints_index[n++] = row_cmat_indices[min_loc];
2307         }
2308         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2309       }
2310       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2311       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2312     }
2313   }
2314   *n_constraints = n;
2315   if (constraints_idx) *constraints_idx = constraints_index;
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 /* the next two functions has been adapted from pcis.c */
2320 #undef __FUNCT__
2321 #define __FUNCT__ "PCBDDCApplySchur"
2322 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2323 {
2324   PetscErrorCode ierr;
2325   PC_IS          *pcis = (PC_IS*)(pc->data);
2326 
2327   PetscFunctionBegin;
2328   if (!vec2_B) { vec2_B = v; }
2329   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2330   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2331   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2332   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2333   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2339 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2340 {
2341   PetscErrorCode ierr;
2342   PC_IS          *pcis = (PC_IS*)(pc->data);
2343 
2344   PetscFunctionBegin;
2345   if (!vec2_B) { vec2_B = v; }
2346   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2347   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2348   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2349   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2350   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "PCBDDCSubsetNumbering"
2356 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2357 {
2358   Vec            local_vec,global_vec;
2359   IS             seqis,paris;
2360   VecScatter     scatter_ctx;
2361   PetscScalar    *array;
2362   PetscInt       *temp_global_dofs;
2363   PetscScalar    globalsum;
2364   PetscInt       i,j,s;
2365   PetscInt       nlocals,first_index,old_index,max_local;
2366   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2367   PetscMPIInt    *dof_sizes,*dof_displs;
2368   PetscBool      first_found;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   /* mpi buffers */
2373   MPI_Comm_size(comm,&size_prec_comm);
2374   MPI_Comm_rank(comm,&rank_prec_comm);
2375   j = ( !rank_prec_comm ? size_prec_comm : 0);
2376   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2377   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2378   /* get maximum size of subset */
2379   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2380   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2381   max_local = 0;
2382   if (n_local_dofs) {
2383     max_local = temp_global_dofs[0];
2384     for (i=1;i<n_local_dofs;i++) {
2385       if (max_local < temp_global_dofs[i] ) {
2386         max_local = temp_global_dofs[i];
2387       }
2388     }
2389   }
2390   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2391   max_global++;
2392   max_local = 0;
2393   if (n_local_dofs) {
2394     max_local = local_dofs[0];
2395     for (i=1;i<n_local_dofs;i++) {
2396       if (max_local < local_dofs[i] ) {
2397         max_local = local_dofs[i];
2398       }
2399     }
2400   }
2401   max_local++;
2402   /* allocate workspace */
2403   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2404   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2405   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2406   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2407   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2408   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2409   /* create scatter */
2410   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2411   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2412   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2413   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2414   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2415   /* init array */
2416   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2417   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2418   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2419   if (local_dofs_mult) {
2420     for (i=0;i<n_local_dofs;i++) {
2421       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2422     }
2423   } else {
2424     for (i=0;i<n_local_dofs;i++) {
2425       array[local_dofs[i]]=1.0;
2426     }
2427   }
2428   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2429   /* scatter into global vec and get total number of global dofs */
2430   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2433   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2434   /* Fill global_vec with cumulative function for global numbering */
2435   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2436   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2437   nlocals = 0;
2438   first_index = -1;
2439   first_found = PETSC_FALSE;
2440   for (i=0;i<s;i++) {
2441     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2442       first_found = PETSC_TRUE;
2443       first_index = i;
2444     }
2445     nlocals += (PetscInt)PetscRealPart(array[i]);
2446   }
2447   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2448   if (!rank_prec_comm) {
2449     dof_displs[0]=0;
2450     for (i=1;i<size_prec_comm;i++) {
2451       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2452     }
2453   }
2454   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2455   if (first_found) {
2456     array[first_index] += (PetscScalar)nlocals;
2457     old_index = first_index;
2458     for (i=first_index+1;i<s;i++) {
2459       if (PetscRealPart(array[i]) > 0.0) {
2460         array[i] += array[old_index];
2461         old_index = i;
2462       }
2463     }
2464   }
2465   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2466   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2467   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2468   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2469   /* get global ordering of local dofs */
2470   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2471   if (local_dofs_mult) {
2472     for (i=0;i<n_local_dofs;i++) {
2473       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2474     }
2475   } else {
2476     for (i=0;i<n_local_dofs;i++) {
2477       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2478     }
2479   }
2480   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2481   /* free workspace */
2482   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2483   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2484   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2485   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2486   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2487   /* return pointer to global ordering of local dofs */
2488   *global_numbering_subset = temp_global_dofs;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2494 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2495 {
2496   PetscInt       i,j;
2497   PetscScalar    *alphas;
2498   PetscErrorCode ierr;
2499 
2500   PetscFunctionBegin;
2501   /* this implements stabilized Gram-Schmidt */
2502   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2503   for (i=0;i<n;i++) {
2504     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2505     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2506     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2507   }
2508   ierr = PetscFree(alphas);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 /* TODO
2513    - now preallocation is done assuming SEQDENSE local matrices
2514 */
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatISGetMPIXAIJ"
2517 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2518 {
2519   Mat                    new_mat;
2520   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2521   /* info on mat */
2522   /* ISLocalToGlobalMapping rmapping,cmapping; */
2523   PetscInt               bs,rows,cols;
2524   PetscInt               lrows,lcols;
2525   PetscInt               local_rows,local_cols;
2526   PetscBool              isdense;
2527   /* values insertion */
2528   PetscScalar            *array;
2529   PetscInt               *local_indices,*global_indices;
2530   /* work */
2531   PetscInt               i,j,index_row;
2532   PetscErrorCode         ierr;
2533 
2534   PetscFunctionBegin;
2535   /* MISSING CHECKS
2536     - rectangular case not covered (it is not allowed by MATIS)
2537   */
2538   /* get info from mat */
2539   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2540   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2541   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2542   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2543 
2544   /* work */
2545   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2546   for (i=0;i<local_rows;i++) local_indices[i]=i;
2547   /* map indices of local mat to global values */
2548   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2549   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2550   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2551 
2552   if (reuse==MAT_INITIAL_MATRIX) {
2553     Vec         vec_dnz,vec_onz;
2554     PetscScalar *my_dnz,*my_onz;
2555     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2556     PetscInt    index_col,owner;
2557     PetscMPIInt nsubdomains;
2558 
2559     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2560     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2561     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2562     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2563     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2564     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2565     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2566 
2567     /*
2568       preallocation
2569     */
2570 
2571     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2572     /*
2573        Some vectors are needed to sum up properly on shared interface dofs.
2574        Preallocation macros cannot do the job.
2575        Note that preallocation is not exact, since it overestimates nonzeros
2576     */
2577     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2578     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2579     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2580     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2581     /* All processes need to compute entire row ownership */
2582     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2583     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2584     for (i=0;i<nsubdomains;i++) {
2585       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2586         row_ownership[j]=i;
2587       }
2588     }
2589 
2590     /*
2591        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2592        then, they will be summed up properly. This way, preallocation is always sufficient
2593     */
2594     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2595     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2596     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2597     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2598     for (i=0;i<local_rows;i++) {
2599       index_row = global_indices[i];
2600       for (j=i;j<local_rows;j++) {
2601         owner = row_ownership[index_row];
2602         index_col = global_indices[j];
2603         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2604           my_dnz[i] += 1.0;
2605         } else { /* offdiag block */
2606           my_onz[i] += 1.0;
2607         }
2608         /* same as before, interchanging rows and cols */
2609         if (i != j) {
2610           owner = row_ownership[index_col];
2611           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2612             my_dnz[j] += 1.0;
2613           } else {
2614             my_onz[j] += 1.0;
2615           }
2616         }
2617       }
2618     }
2619     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2620     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2621     if (local_rows) { /* multilevel guard */
2622       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2623       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2624     }
2625     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2626     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2627     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2628     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2629     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2630     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2631     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2632 
2633     /* set computed preallocation in dnz and onz */
2634     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2635     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2636     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2637     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2638     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2639     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2640     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2641     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2642 
2643     /* Resize preallocation if overestimated */
2644     for (i=0;i<lrows;i++) {
2645       dnz[i] = PetscMin(dnz[i],lcols);
2646       onz[i] = PetscMin(onz[i],cols-lcols);
2647     }
2648     /* set preallocation */
2649     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2650     for (i=0;i<lrows/bs;i++) {
2651       dnz[i] = dnz[i*bs]/bs;
2652       onz[i] = onz[i*bs]/bs;
2653     }
2654     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2655     for (i=0;i<lrows/bs;i++) {
2656       dnz[i] = dnz[i]-i;
2657     }
2658     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2659     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2660     *M = new_mat;
2661   } else {
2662     PetscInt mbs,mrows,mcols;
2663     /* some checks */
2664     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2665     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2666     if (mrows != rows) {
2667       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2668     }
2669     if (mrows != rows) {
2670       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2671     }
2672     if (mbs != bs) {
2673       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2674     }
2675     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2676   }
2677   /* set local to global mappings */
2678   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2679   /* Set values */
2680   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2681   if (isdense) { /* special case for dense local matrices */
2682     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2683     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2684     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2685     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2686     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2687     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2688   } else { /* very basic values insertion for all other matrix types */
2689     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2690     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2691     for (i=0;i<local_rows;i++) {
2692       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2693       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2694       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2695       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2696       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2697       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2698     }
2699   }
2700   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2701   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702   if (isdense) {
2703     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2704   }
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 #undef __FUNCT__
2709 #define __FUNCT__ "MatISSubassemble_Private"
2710 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2711 {
2712   Mat             subdomain_adj;
2713   IS              new_ranks,ranks_send_to;
2714   MatPartitioning partitioner;
2715   Mat_IS          *matis;
2716   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2717   PetscInt        prank;
2718   PetscMPIInt     size,rank,color;
2719   PetscInt        *xadj,*adjncy,*oldranks;
2720   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2721   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2722   PetscErrorCode  ierr;
2723   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2724   PetscSubcomm    subcomm;
2725 
2726   PetscFunctionBegin;
2727   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2728   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2729   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2730 
2731   /* Get info on mapping */
2732   matis = (Mat_IS*)(mat->data);
2733   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2734   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2735 
2736   /* build local CSR graph of subdomains' connectivity */
2737   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2738   xadj[0] = 0;
2739   xadj[1] = PetscMax(n_neighs-1,0);
2740   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2741   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2742 
2743   if (threshold) {
2744     PetscInt* count,min_threshold;
2745     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2746     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2747     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2748       for (j=0;j<n_shared[i];j++) {
2749         count[shared[i][j]] += 1;
2750       }
2751     }
2752     /* adapt threshold since we dont want to lose significant connections */
2753     min_threshold = n_neighs;
2754     for (i=1;i<n_neighs;i++) {
2755       for (j=0;j<n_shared[i];j++) {
2756         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2757       }
2758     }
2759     threshold = PetscMax(min_threshold+1,threshold);
2760     xadj[1] = 0;
2761     for (i=1;i<n_neighs;i++) {
2762       for (j=0;j<n_shared[i];j++) {
2763         if (count[shared[i][j]] < threshold) {
2764           adjncy[xadj[1]] = neighs[i];
2765           adjncy_wgt[xadj[1]] = n_shared[i];
2766           xadj[1]++;
2767           break;
2768         }
2769       }
2770     }
2771     ierr = PetscFree(count);CHKERRQ(ierr);
2772   } else {
2773     if (xadj[1]) {
2774       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2775       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2776     }
2777   }
2778   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2779   if (use_square) {
2780     for (i=0;i<xadj[1];i++) {
2781       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2782     }
2783   }
2784   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2785 
2786   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2787 
2788   /*
2789     Restrict work on active processes only.
2790   */
2791   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2792   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2793   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2794   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2795   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2796   if (color) {
2797     ierr = PetscFree(xadj);CHKERRQ(ierr);
2798     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2799     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2800   } else {
2801     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2802     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2803     prank = rank;
2804     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2805     for (i=0;i<size;i++) {
2806       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2807     }
2808     for (i=0;i<xadj[1];i++) {
2809       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2810     }
2811     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2812     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2813     n_subdomains = (PetscInt)size;
2814     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2815 
2816     /* Partition */
2817     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2818     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2819     if (use_vwgt) {
2820       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2821       v_wgt[0] = local_size;
2822       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2823     }
2824     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2825     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2826     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2827     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2828     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2829 
2830     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2831     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2832     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2833     /* clean up */
2834     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2835     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2836     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2837     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2838   }
2839   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2840 
2841   /* assemble parallel IS for sends */
2842   i = 1;
2843   if (color) i=0;
2844   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2845   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2846 
2847   /* get back IS */
2848   *is_sends = ranks_send_to;
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatISSubassemble"
2856 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2857 {
2858   Mat                    local_mat,new_mat;
2859   Mat_IS                 *matis;
2860   IS                     is_sends_internal;
2861   PetscInt               rows,cols;
2862   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2863   PetscBool              ismatis,isdense;
2864   ISLocalToGlobalMapping l2gmap;
2865   PetscInt*              l2gmap_indices;
2866   const PetscInt*        is_indices;
2867   MatType                new_local_type;
2868   MatTypePrivate         new_local_type_private;
2869   /* buffers */
2870   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2871   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2872   /* MPI */
2873   MPI_Comm               comm;
2874   PetscMPIInt            n_sends,n_recvs,commsize;
2875   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2876   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2877   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2878   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2879   PetscErrorCode         ierr;
2880 
2881   PetscFunctionBegin;
2882   /* checks */
2883   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2884   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2885   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2886   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2887   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2888   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2889   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2890   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2891   PetscValidLogicalCollectiveInt(mat,bs,0);
2892   /* prepare IS for sending if not provided */
2893   if (!is_sends) {
2894     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2895     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2896   } else {
2897     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2898     is_sends_internal = is_sends;
2899   }
2900 
2901   /* get pointer of MATIS data */
2902   matis = (Mat_IS*)mat->data;
2903 
2904   /* get comm */
2905   comm = PetscObjectComm((PetscObject)mat);
2906 
2907   /* compute number of sends */
2908   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2909   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2910 
2911   /* compute number of receives */
2912   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2913   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2914   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2915   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2916   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2917   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2918   ierr = PetscFree(iflags);CHKERRQ(ierr);
2919 
2920   /* prepare send/receive buffers */
2921   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2922   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2923   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2924   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2925 
2926   /* Get data from local mat */
2927   if (!isdense) {
2928     /* TODO: See below some guidelines on how to prepare the local buffers */
2929     /*
2930        send_buffer_vals should contain the raw values of the local matrix
2931        send_buffer_idxs should contain:
2932        - MatType_PRIVATE type
2933        - PetscInt        size_of_l2gmap
2934        - PetscInt        global_row_indices[size_of_l2gmap]
2935        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2936     */
2937   } else {
2938     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2939     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2940     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2941     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2942     send_buffer_idxs[1] = i;
2943     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2944     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2945     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2946     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2947     for (i=0;i<n_sends;i++) {
2948       ilengths_vals[is_indices[i]] = len*len;
2949       ilengths_idxs[is_indices[i]] = len+2;
2950     }
2951   }
2952   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2953   buf_size_idxs = 0;
2954   buf_size_vals = 0;
2955   for (i=0;i<n_recvs;i++) {
2956     buf_size_idxs += (PetscInt)olengths_idxs[i];
2957     buf_size_vals += (PetscInt)olengths_vals[i];
2958   }
2959   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2960   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2961 
2962   /* get new tags for clean communications */
2963   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2964   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2965 
2966   /* allocate for requests */
2967   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2968   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2969   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2970   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2971 
2972   /* communications */
2973   ptr_idxs = recv_buffer_idxs;
2974   ptr_vals = recv_buffer_vals;
2975   for (i=0;i<n_recvs;i++) {
2976     source_dest = onodes[i];
2977     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2978     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2979     ptr_idxs += olengths_idxs[i];
2980     ptr_vals += olengths_vals[i];
2981   }
2982   for (i=0;i<n_sends;i++) {
2983     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2984     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2985     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2986   }
2987   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2988   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2989 
2990   /* assemble new l2g map */
2991   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2992   ptr_idxs = recv_buffer_idxs;
2993   buf_size_idxs = 0;
2994   for (i=0;i<n_recvs;i++) {
2995     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2996     ptr_idxs += olengths_idxs[i];
2997   }
2998   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2999   ptr_idxs = recv_buffer_idxs;
3000   buf_size_idxs = 0;
3001   for (i=0;i<n_recvs;i++) {
3002     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3003     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3004     ptr_idxs += olengths_idxs[i];
3005   }
3006   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3007   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3008   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3009 
3010   /* infer new local matrix type from received local matrices type */
3011   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3012   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3013   new_local_type_private = MATAIJ_PRIVATE;
3014   new_local_type = MATSEQAIJ;
3015   if (n_recvs) {
3016     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3017     ptr_idxs = recv_buffer_idxs;
3018     for (i=0;i<n_recvs;i++) {
3019       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3020         new_local_type_private = MATAIJ_PRIVATE;
3021         break;
3022       }
3023       ptr_idxs += olengths_idxs[i];
3024     }
3025     switch (new_local_type_private) {
3026       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3027         new_local_type = MATSEQAIJ;
3028         bs = 1;
3029         break;
3030       case MATAIJ_PRIVATE:
3031         new_local_type = MATSEQAIJ;
3032         bs = 1;
3033         break;
3034       case MATBAIJ_PRIVATE:
3035         new_local_type = MATSEQBAIJ;
3036         break;
3037       case MATSBAIJ_PRIVATE:
3038         new_local_type = MATSEQSBAIJ;
3039         break;
3040       default:
3041         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3042         break;
3043     }
3044   }
3045 
3046   /* create MATIS object */
3047   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3048   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3049   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3050   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3051   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3052   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3053 
3054   /* set values */
3055   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3056   ptr_vals = recv_buffer_vals;
3057   ptr_idxs = recv_buffer_idxs;
3058   for (i=0;i<n_recvs;i++) {
3059     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3060       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3061       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3062       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3063       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3064       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3065     }
3066     ptr_idxs += olengths_idxs[i];
3067     ptr_vals += olengths_vals[i];
3068   }
3069   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3070   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3071   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3072   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073 
3074   { /* check */
3075     Vec       lvec,rvec;
3076     PetscReal infty_error;
3077 
3078     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3079     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3080     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3081     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3082     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3083     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3084     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3085     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3086     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3087   }
3088 
3089   /* free workspace */
3090   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3091   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3092   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3093   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3094   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3095   if (isdense) {
3096     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3097     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3098   } else {
3099     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3100   }
3101   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3102   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3103   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3104   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3105   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3106   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3107   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3108   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3109   ierr = PetscFree(onodes);CHKERRQ(ierr);
3110   /* get back new mat */
3111   *mat_n = new_mat;
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3117 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3118 {
3119   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3120   PC_IS                  *pcis = (PC_IS*)pc->data;
3121   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3122   MatNullSpace           CoarseNullSpace=NULL;
3123   ISLocalToGlobalMapping coarse_islg;
3124   IS                     coarse_is;
3125   PetscInt               max_it;
3126   PetscInt               im_active=-1,active_procs=-1;
3127   PC                     pc_temp;
3128   PCType                 coarse_pc_type;
3129   KSPType                coarse_ksp_type;
3130   PetscBool              multilevel_requested,multilevel_allowed;
3131   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3132   MatStructure           matstruct;
3133   PetscErrorCode         ierr;
3134 
3135   PetscFunctionBegin;
3136   /* Assign global numbering to coarse dofs */
3137   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3138     PetscInt ocoarse_size;
3139     ocoarse_size = pcbddc->coarse_size;
3140     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3141     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3142     /* see if we can avoid some work */
3143     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3144       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3145         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3146         coarse_reuse = PETSC_FALSE;
3147       } else { /* we can safely reuse already computed coarse matrix */
3148         coarse_reuse = PETSC_TRUE;
3149       }
3150     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3151       coarse_reuse = PETSC_FALSE;
3152     }
3153   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3154     coarse_reuse = PETSC_TRUE;
3155   }
3156 
3157   /* infer some info from user */
3158   issym = PETSC_FALSE;
3159   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3160   multilevel_allowed = PETSC_FALSE;
3161   multilevel_requested = PETSC_FALSE;
3162   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3163   if (multilevel_requested) {
3164     /* count "active processes" */
3165     im_active = !!(pcis->n);
3166     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3167     if (active_procs/pcbddc->coarsening_ratio < 2) {
3168       multilevel_allowed = PETSC_FALSE;
3169     } else {
3170       multilevel_allowed = PETSC_TRUE;
3171     }
3172   }
3173 
3174   /* set defaults for coarse KSP and PC */
3175   if (multilevel_allowed) {
3176     if (issym) {
3177       coarse_ksp_type = KSPRICHARDSON;
3178     } else {
3179       coarse_ksp_type = KSPCHEBYSHEV;
3180     }
3181     coarse_pc_type = PCBDDC;
3182   } else {
3183     coarse_ksp_type = KSPPREONLY;
3184     coarse_pc_type = PCREDUNDANT;
3185   }
3186 
3187   /* create the coarse KSP object only once with defaults */
3188   if (!pcbddc->coarse_ksp) {
3189     char prefix[256],str_level[3];
3190     size_t len;
3191     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3192     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3193     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3194     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3195     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3196     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3197     /* prefix */
3198     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3199     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3200     if (!pcbddc->current_level) {
3201       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3202       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3203     } else {
3204       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3205       if (pcbddc->current_level>1) len -= 2;
3206       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3207       *(prefix+len)='\0';
3208       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3209       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3210     }
3211     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3212   }
3213   /* allow user customization */
3214   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3215   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3216   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3217 
3218   /* get some info after set from options */
3219   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3220   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3221   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3222   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3223     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3224     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3225     isbddc = PETSC_FALSE;
3226   }
3227 
3228   /* propagate BDDC info to the next level */
3229   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3230   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3231   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3232 
3233   /* creates temporary MATIS object for coarse matrix */
3234   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3235   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3236   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3237   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3238   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3239   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3240   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3241   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3242   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3243 
3244   /* assemble coarse matrix */
3245   if (isbddc || isnn) {
3246     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3247   } else {
3248     if (coarse_reuse) {
3249       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3250       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3251       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3252     } else {
3253       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3254     }
3255   }
3256   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3257 
3258   /* create local to global scatters for coarse problem */
3259   if (pcbddc->new_primal_space) {
3260     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3261     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3262     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3263     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3264     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3265   }
3266   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3267 
3268   /* propagate symmetry info to coarse matrix */
3269   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3270 
3271   /* Compute coarse null space (special handling by BDDC only) */
3272   if (pcbddc->NullSpace) {
3273     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3274     if (isbddc) {
3275       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3276     } else {
3277       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3278     }
3279   }
3280 
3281   /* set operators */
3282   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3283   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3284 
3285   /* additional KSP customization */
3286   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3287   if (max_it < 5) {
3288     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3289   }
3290   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3291 
3292 
3293   /* print some info if requested */
3294   if (pcbddc->dbg_flag) {
3295     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3296     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3297     if (!multilevel_allowed) {
3298       if (multilevel_requested) {
3299         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3300       } else if (pcbddc->max_levels) {
3301         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3302       }
3303     }
3304     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3305     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3306   }
3307 
3308   /* setup coarse ksp */
3309   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3310   if (pcbddc->dbg_flag) {
3311     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3312     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3313     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3315   }
3316 
3317   /* Check coarse problem if requested */
3318   if (pcbddc->dbg_flag) {
3319     KSP       check_ksp;
3320     KSPType   check_ksp_type;
3321     PC        check_pc;
3322     Vec       check_vec;
3323     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3324     PetscInt  its;
3325     PetscBool ispreonly,compute;
3326 
3327     /* Create ksp object suitable for estimation of extreme eigenvalues */
3328     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3329     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3330     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3331     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3332     if (ispreonly) {
3333       check_ksp_type = KSPPREONLY;
3334       compute = PETSC_FALSE;
3335     } else {
3336       if (issym) check_ksp_type = KSPCG;
3337       else check_ksp_type = KSPGMRES;
3338       compute = PETSC_TRUE;
3339     }
3340     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3341     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3342     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3343     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3344     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3345     /* create random vec */
3346     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3347     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3348     if (CoarseNullSpace) {
3349       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3350     }
3351     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3352     /* solve coarse problem */
3353     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3354     if (CoarseNullSpace) {
3355       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3356     }
3357     /* check coarse problem residual error */
3358     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3359     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3360     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3361     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3362     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3363     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3364     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3365     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3366     /* get eigenvalue estimation if preonly has not been requested */
3367     if (!ispreonly) {
3368       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3369       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3370       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3371     }
3372     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3373     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3374   }
3375   /* free memory */
3376   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3377   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 #undef __FUNCT__
3382 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3383 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3384 {
3385   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3386   PC_IS*         pcis = (PC_IS*)pc->data;
3387   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3388   PetscInt       i,coarse_size;
3389   PetscInt       *local_primal_indices;
3390   PetscErrorCode ierr;
3391 
3392   PetscFunctionBegin;
3393   /* Compute global number of coarse dofs */
3394   if (!pcbddc->primal_indices_local_idxs) {
3395     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3396   }
3397   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3398 
3399   /* check numbering */
3400   if (pcbddc->dbg_flag) {
3401     PetscScalar coarsesum,*array;
3402 
3403     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3404     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3405     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3406     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3407     for (i=0;i<pcbddc->local_primal_size;i++) {
3408       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3409     }
3410     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3411     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3412     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3413     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3414     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3415     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3416     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3417     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3418     for (i=0;i<pcis->n;i++) {
3419       if (array[i] == 1.0) {
3420         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3421       }
3422     }
3423     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3424     for (i=0;i<pcis->n;i++) {
3425       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3426     }
3427     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3428     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3429     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3430     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3431     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3432     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3433     if (pcbddc->dbg_flag > 1) {
3434       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3435       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3436       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3437       for (i=0;i<pcbddc->local_primal_size;i++) {
3438         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3439       }
3440       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3441     }
3442     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3443   }
3444   /* get back data */
3445   *coarse_size_n = coarse_size;
3446   *local_primal_indices_n = local_primal_indices;
3447   PetscFunctionReturn(0);
3448 }
3449 
3450