xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 727cdba63cef3c72c66ef27a9d74c90cecc5d6d6)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
23 
24   /* Change global null space passed in by the user if change of basis has been requested */
25   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
26     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
27   }
28 
29   /*
30      Setup local correction and local part of coarse basis.
31      Gives back the dense local part of the coarse matrix in column major ordering
32   */
33   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
34 
35   /* Compute total number of coarse nodes and setup coarse solver */
36   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
37 
38   /* free */
39   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCBDDCResetCustomization"
45 PetscErrorCode PCBDDCResetCustomization(PC pc)
46 {
47   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
48   PetscInt       i;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
57   for (i=0;i<pcbddc->n_ISForDofs;i++) {
58     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
59   }
60   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
61   pcbddc->n_ISForDofs = 0;
62   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
63   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_vertices,n_constraints,local_primal_size,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr);
130   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
131   n_constraints = local_primal_size - n_vertices;
132   n_R = pcis->n-n_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   ISLocalToGlobalMapping BtoNmap;
190   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
191   const PetscScalar      *array,*row_cmat_values;
192   const PetscInt         *row_cmat_indices,*idx_R_local;
193   PetscInt               *vertices,*idx_V_B,*auxindices;
194   PetscInt               n_vertices,n_constraints,size_of_constraint;
195   PetscInt               i,j,n_R,n_D,n_B;
196   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
197   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
198   MatType                impMatType;
199   /* some shortcuts to scalars */
200   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
201   /* for debugging purposes */
202   PetscReal              *coarsefunctions_errors,*constraints_errors;
203 
204   PetscFunctionBegin;
205   /* get number of vertices and their local indices */
206   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
221   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
222   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
223   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
224   if (i != n_vertices) {
225     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
226   }
227 
228   /* Precompute stuffs needed for preprocessing and application of BDDC*/
229   if (n_constraints) {
230     /* see if we can save some allocations */
231     if (pcbddc->local_auxmat2) {
232       PetscInt on_R,on_constraints;
233       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
234       if (on_R != n_R || on_constraints != n_constraints) {
235         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
236         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
237       }
238     }
239     /* work vectors */
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
242     /* auxiliary matrices */
243     if (!pcbddc->local_auxmat2) {
244       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
245       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
246       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
247       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
248     }
249 
250     /* Extract constraints on R nodes: C_{CR}  */
251     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
252     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
253     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
254 
255     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
256     for (i=0;i<n_constraints;i++) {
257       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
258       /* Get row of constraint matrix in R numbering */
259       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
261       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
262       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
263       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
264       /* Solve for row of constraint matrix in R numbering */
265       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
266       /* Set values in local_auxmat2 */
267       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
269       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
270     }
271     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
274 
275     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
276     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
277     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
278     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
279     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
280     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
281     ierr = MatSetUp(M1);CHKERRQ(ierr);
282     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
283     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
284     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
285     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
286     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
287     ierr = MatDestroy(&M2);CHKERRQ(ierr);
288     ierr = MatDestroy(&M3);CHKERRQ(ierr);
289     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
290     if (!pcbddc->local_auxmat1) {
291       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     } else {
293       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
294     }
295   }
296 
297   /* Get submatrices from subdomain matrix */
298   if (n_vertices) {
299     PetscInt ibs,mbs;
300     PetscBool issbaij;
301     Mat newmat;
302 
303     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
304     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
305     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
306     if (ibs != mbs) { /* need to convert to SEQAIJ */
307       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
312     } else {
313       /* this is safe */
314       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
315       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
316       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
317         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
318         /* which of the two approaches is faster? */
319         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
321         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
322         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
323         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
324       } else {
325         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
326         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
327       }
328     }
329     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
330     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
331     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
332   }
333 
334   /* Matrix of coarse basis functions (local) */
335   if (pcbddc->coarse_phi_B) {
336     PetscInt on_B,on_primal;
337     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
338     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
339       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
340       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
341     }
342   }
343   if (pcbddc->coarse_phi_D) {
344     PetscInt on_D,on_primal;
345     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
346     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
347       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
348       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
349     }
350   }
351   if (!pcbddc->coarse_phi_B) {
352     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
353     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
354     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
355     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
356   }
357   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
358     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
359     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
360     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
361     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
362   }
363 
364   if (pcbddc->dbg_flag) {
365     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
366     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
367     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
368   }
369   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
370   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
371 
372   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
373 
374   /* vertices */
375   for (i=0;i<n_vertices;i++) {
376     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
377     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
378     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
379     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
380     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
381     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
382     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
383     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
386     if (n_constraints) {
387       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
388       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
389       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
390     }
391     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
392     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
393 
394     /* Set values in coarse basis function and subdomain part of coarse_mat */
395     /* coarse basis functions */
396     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
397     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
401     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
402     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
403     if (pcbddc->switch_static || pcbddc->dbg_flag) {
404       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
408       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409     }
410     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
411     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
414     if (n_constraints) {
415       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
416       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
417       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
418     }
419 
420     /* check */
421     if (pcbddc->dbg_flag) {
422       /* assemble subdomain vector on local nodes */
423       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
424       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
434       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
435       if (n_constraints) {
436         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
437         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
438         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
439       }
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
453     }
454   }
455 
456   /* constraints */
457   for (i=0;i<n_constraints;i++) {
458     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
459     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
460     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
461     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
462     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
463     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
464     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
465     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
466     if (n_vertices) {
467       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
468     }
469     /* Set values in coarse basis function and subdomain part of coarse_mat */
470     /* coarse basis functions */
471     j = i+n_vertices; /* don't touch this! */
472     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
473     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
477     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
478     if (pcbddc->switch_static || pcbddc->dbg_flag) {
479       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
483       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
484     }
485     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
486     if (n_vertices) {
487       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
488       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
489       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
490     }
491     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
492     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
494 
495     if (pcbddc->dbg_flag) {
496       /* assemble subdomain vector on nodes */
497       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
498       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
500       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
501       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
502       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
503       /* assemble subdomain vector of lagrange multipliers */
504       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
505       if (n_vertices) {
506         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
509       }
510       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
513       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
514       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
515       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
516       /* check saddle point solution */
517       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
518       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
519       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
520       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
521       /* shift by the identity matrix */
522       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
523       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
524       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
525       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
526     }
527   }
528   /* call assembling routines for local coarse basis */
529   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   if (pcbddc->switch_static || pcbddc->dbg_flag) {
532     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534   }
535 
536   /* compute other basis functions for non-symmetric problems */
537   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
538   if (!setsym || (setsym && !issym)) {
539     if (!pcbddc->coarse_psi_B) {
540       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
541       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
542       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
543       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
544     }
545     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
546       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
547       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
548       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
549       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
550     }
551     for (i=0;i<pcbddc->local_primal_size;i++) {
552       if (n_constraints) {
553         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
554         for (j=0;j<n_constraints;j++) {
555           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
556         }
557         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
558         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
559       }
560       if (i<n_vertices) {
561         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
562         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
563         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
564         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
565         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
566         if (n_constraints) {
567           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
568         }
569       } else {
570         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
571       }
572       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
573       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
574       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
575       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
576       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
578       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
579       if (i<n_vertices) {
580         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
581       }
582       if (pcbddc->switch_static || pcbddc->dbg_flag) {
583         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
585         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
587         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
588       }
589 
590       if (pcbddc->dbg_flag) {
591         /* assemble subdomain vector on nodes */
592         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
593         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
595         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
596         if (i<n_vertices) {
597           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
598         }
599         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
600         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
601         /* assemble subdomain vector of lagrange multipliers */
602         for (j=0;j<pcbddc->local_primal_size;j++) {
603           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
604         }
605         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
606         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
607         /* check saddle point solution */
608         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
609         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
610         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
611         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
612         /* shift by the identity matrix */
613         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
614         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
615         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
616         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
617       }
618     }
619     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     if (pcbddc->switch_static || pcbddc->dbg_flag) {
622       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
623       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
624     }
625   }
626   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
627   /* Checking coarse_sub_mat and coarse basis functios */
628   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
629   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
630   if (pcbddc->dbg_flag) {
631     Mat         coarse_sub_mat;
632     Mat         AUXMAT,TM1,TM2,TM3,TM4;
633     Mat         coarse_phi_D,coarse_phi_B;
634     Mat         coarse_psi_D,coarse_psi_B;
635     Mat         A_II,A_BB,A_IB,A_BI;
636     MatType     checkmattype=MATSEQAIJ;
637     PetscReal   real_value;
638 
639     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
640     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
641     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
642     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
643     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
644     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
645     if (pcbddc->coarse_psi_B) {
646       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
647       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
648     }
649     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
650 
651     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
652     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
653     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
654     if (pcbddc->coarse_psi_B) {
655       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
656       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
657       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
658       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
659       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
660       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
661       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
665       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
666       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
667     } else {
668       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
669       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
670       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
674       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
675       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
676     }
677     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
680     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
681     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
682     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
685     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
686     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
687     for (i=0;i<pcbddc->local_primal_size;i++) {
688       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
689     }
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
691     for (i=0;i<pcbddc->local_primal_size;i++) {
692       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
693     }
694     if (pcbddc->coarse_psi_B) {
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
696       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
697         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
698       }
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
700       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
701         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
702       }
703     }
704     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
707     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
708     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
711     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
712     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
713     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
714     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
715     if (pcbddc->coarse_psi_B) {
716       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
717       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
718     }
719     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
720     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
721     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
722     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
723   }
724   /* free memory */
725   if (n_vertices) {
726     ierr = PetscFree(vertices);CHKERRQ(ierr);
727     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
728     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
732   }
733   if (n_constraints) {
734     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
735     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
736     ierr = MatDestroy(&M1);CHKERRQ(ierr);
737     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
738   }
739   ierr = PetscFree(auxindices);CHKERRQ(ierr);
740   /* get back data */
741   *coarse_submat_vals_n = coarse_submat_vals;
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
747 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
748 {
749   PC_IS*            pcis = (PC_IS*)(pc->data);
750   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
751   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
752   PetscBool         issbaij,isbaij;
753   /* manage repeated solves */
754   MatReuse          reuse;
755   MatStructure      matstruct;
756   PetscErrorCode    ierr;
757 
758   PetscFunctionBegin;
759   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
760     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
761   }
762   /* get mat flags */
763   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
764   reuse = MAT_INITIAL_MATRIX;
765   if (pc->setupcalled) {
766     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
767     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
768     if (matstruct == SAME_NONZERO_PATTERN) {
769       reuse = MAT_REUSE_MATRIX;
770     } else {
771       reuse = MAT_INITIAL_MATRIX;
772     }
773   }
774   if (reuse == MAT_INITIAL_MATRIX) {
775     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
777     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
780   }
781 
782   /* transform local matrices if needed */
783   if (pcbddc->use_change_of_basis) {
784     Mat         change_mat_all;
785     PetscScalar *row_cmat_values;
786     PetscInt    *row_cmat_indices;
787     PetscInt    *nnz,*is_indices,*temp_indices;
788     PetscInt    i,j,k,n_D,n_B;
789 
790     /* Get Non-overlapping dimensions */
791     n_B = pcis->n_B;
792     n_D = pcis->n-n_B;
793 
794     /* compute nonzero structure of change of basis on all local nodes */
795     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
796     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
797     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
798     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
800     k=1;
801     for (i=0;i<n_B;i++) {
802       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
803       nnz[is_indices[i]]=j;
804       if (k < j) k = j;
805       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
806     }
807     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
808     /* assemble change of basis matrix on the whole set of local dofs */
809     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
810     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
811     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
812     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
813     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
814     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     for (i=0;i<n_D;i++) {
816       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
817     }
818     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
819     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820     for (i=0;i<n_B;i++) {
821       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
822       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
823       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
824       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
825     }
826     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
829     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
830     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
831     if (!issbaij && !isbaij) {
832       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
833     } else {
834       Mat work_mat;
835       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
836       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
837       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
838     }
839     /*
840     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
841     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
842     */
843     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
844     ierr = PetscFree(nnz);CHKERRQ(ierr);
845     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
846   } else {
847     /* without change of basis, the local matrix is unchanged */
848     if (!pcbddc->local_mat) {
849       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
850       pcbddc->local_mat = matis->A;
851     }
852   }
853 
854   /* get submatrices */
855   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
856   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
857   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
858   if (!issbaij) {
859     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
861   } else {
862     Mat newmat;
863     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
865     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
866     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
873 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
874 {
875   PC_IS*         pcis = (PC_IS*)(pc->data);
876   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
877   IS             is_aux1,is_aux2;
878   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
879   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
880   PetscInt       vbs,bs;
881   PetscBT        bitmask;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   /* No need to setup local scatters if primal space is unchanged */
886   if (!pcbddc->new_primal_space_local) {
887     PetscFunctionReturn(0);
888   }
889   /* destroy old objects */
890   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
892   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
893   /* Set Non-overlapping dimensions */
894   n_B = pcis->n_B; n_D = pcis->n - n_B;
895   /* get vertex indices from constraint matrix */
896   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
897 
898   /* create auxiliary bitmask */
899   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
900   for (i=0;i<n_vertices;i++) {
901     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
902   }
903 
904   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
905   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
906   for (i=0, n_R=0; i<pcis->n; i++) {
907     if (!PetscBTLookup(bitmask,i)) {
908       idx_R_local[n_R] = i;
909       n_R++;
910     }
911   }
912 
913   /* Block code */
914   vbs = 1;
915   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
916   if (bs>1 && !(n_vertices%bs)) {
917     PetscBool is_blocked = PETSC_TRUE;
918     PetscInt  *vary;
919     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
920     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
921     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
922     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
923     for (i=0; i<n_vertices; i++) {
924       if (vary[i]!=0 && vary[i]!=bs) {
925         is_blocked = PETSC_FALSE;
926         break;
927       }
928     }
929     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
930       vbs = bs;
931       for (i=0;i<n_R/vbs;i++) {
932         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
933       }
934     }
935     ierr = PetscFree(vary);CHKERRQ(ierr);
936   }
937   ierr = PetscFree(vertices);CHKERRQ(ierr);
938   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
939   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
940 
941   /* print some info if requested */
942   if (pcbddc->dbg_flag) {
943     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
944     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
948     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
949     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
950   }
951 
952   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
953   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
954   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
955   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
956   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
957   for (i=0; i<n_D; i++) {
958     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
959   }
960   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
961   for (i=0, j=0; i<n_R; i++) {
962     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
963       aux_array1[j++] = i;
964     }
965   }
966   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
967   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_B; i++) {
969     if (!PetscBTLookup(bitmask,is_indices[i])) {
970       aux_array2[j++] = i;
971     }
972   }
973   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
975   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
976   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
977   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
978 
979   if (pcbddc->switch_static || pcbddc->dbg_flag) {
980     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
981     for (i=0, j=0; i<n_R; i++) {
982       if (PetscBTLookup(bitmask,idx_R_local[i])) {
983         aux_array1[j++] = i;
984       }
985     }
986     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
987     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
988     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
989   }
990   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
991   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
998 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
999 {
1000   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1001   PC_IS          *pcis = (PC_IS*)pc->data;
1002   PC             pc_temp;
1003   Mat            A_RR;
1004   Vec            vec1,vec2,vec3;
1005   MatStructure   matstruct;
1006   MatReuse       reuse;
1007   PetscScalar    m_one = -1.0;
1008   PetscReal      value;
1009   PetscInt       n_D,n_R,ibs,mbs;
1010   PetscBool      use_exact,use_exact_reduced,issbaij;
1011   PetscErrorCode ierr;
1012   /* prefixes stuff */
1013   char           dir_prefix[256],neu_prefix[256],str_level[3];
1014   size_t         len;
1015 
1016   PetscFunctionBegin;
1017   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1018 
1019   /* compute prefixes */
1020   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1021   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1022   if (!pcbddc->current_level) {
1023     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1025     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1026     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1027   } else {
1028     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1029     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1030     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1031     len -= 15; /* remove "pc_bddc_coarse_" */
1032     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1033     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1035     *(dir_prefix+len)='\0';
1036     *(neu_prefix+len)='\0';
1037     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1039     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1040     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1041   }
1042 
1043   /* DIRICHLET PROBLEM */
1044   /* Matrix for Dirichlet problem is pcis->A_II */
1045   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1046   if (!pcbddc->ksp_D) { /* create object if not yet build */
1047     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1048     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1049     /* default */
1050     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1051     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1052     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1053     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1054     if (issbaij) {
1055       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1056     } else {
1057       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1058     }
1059     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1060   }
1061   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1062   /* Allow user's customization */
1063   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1064   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1065   if (!n_D) {
1066     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1067     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1068   }
1069   /* Set Up KSP for Dirichlet problem of BDDC */
1070   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1071   /* set ksp_D into pcis data */
1072   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1073   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1074   pcis->ksp_D = pcbddc->ksp_D;
1075 
1076   /* NEUMANN PROBLEM */
1077   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1078   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1079   if (pcbddc->ksp_R) { /* already created ksp */
1080     PetscInt nn_R;
1081     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1082     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1083     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1084     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1085       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1086       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1087       reuse = MAT_INITIAL_MATRIX;
1088     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1089       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1090         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091         reuse = MAT_INITIAL_MATRIX;
1092       } else { /* safe to reuse the matrix */
1093         reuse = MAT_REUSE_MATRIX;
1094       }
1095     }
1096     /* last check */
1097     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1098       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099       reuse = MAT_INITIAL_MATRIX;
1100     }
1101   } else { /* first time, so we need to create the matrix */
1102     reuse = MAT_INITIAL_MATRIX;
1103   }
1104   /* extract A_RR */
1105   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1106   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1107   if (ibs != mbs) {
1108     Mat newmat;
1109     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1110     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1111     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1112   } else {
1113     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1114   }
1115   if (!pcbddc->ksp_R) { /* create object if not present */
1116     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1117     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1118     /* default */
1119     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1120     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1121     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1122     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1123     if (issbaij) {
1124       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1125     } else {
1126       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1127     }
1128     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1129   }
1130   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1131   /* Allow user's customization */
1132   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1133   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1134   if (!n_R) {
1135     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1136     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1137   }
1138   /* Set Up KSP for Neumann problem of BDDC */
1139   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1140 
1141   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1142 
1143   /* Dirichlet */
1144   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1145   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1146   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1147   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1148   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1149   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1150   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1151   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1152   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1153   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1154   /* need to be adapted? */
1155   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1156   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1157   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1158   /* print info */
1159   if (pcbddc->dbg_flag) {
1160     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1161     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1162     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1163     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1164     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1165   }
1166   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1167     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1168   }
1169 
1170   /* Neumann */
1171   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1172   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1173   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1174   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1175   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1176   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1177   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1178   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1179   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1180   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1181   /* need to be adapted? */
1182   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1183   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1184   /* print info */
1185   if (pcbddc->dbg_flag) {
1186     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1187     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1188   }
1189   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1190     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1191   }
1192   /* free Neumann problem's matrix */
1193   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1199 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1200 {
1201   PetscErrorCode ierr;
1202   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1203 
1204   PetscFunctionBegin;
1205   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1206   if (pcbddc->local_auxmat1) {
1207     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1208     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1215 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1216 {
1217   PetscErrorCode ierr;
1218   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1219   PC_IS*            pcis = (PC_IS*)  (pc->data);
1220   const PetscScalar zero = 0.0;
1221 
1222   PetscFunctionBegin;
1223   /* Application of PHI^T (or PSI^T)  */
1224   if (pcbddc->coarse_psi_B) {
1225     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1226     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1227   } else {
1228     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1229     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1230   }
1231   /* Scatter data of coarse_rhs */
1232   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1233   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234 
1235   /* Local solution on R nodes */
1236   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1237   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239   if (pcbddc->switch_static) {
1240     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1241     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1242   }
1243   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1244   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1245   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247   if (pcbddc->switch_static) {
1248     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250   }
1251 
1252   /* Coarse solution */
1253   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1255     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1256   }
1257   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259 
1260   /* Sum contributions from two levels */
1261   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1262   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1268 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1269 {
1270   PetscErrorCode ierr;
1271   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1272 
1273   PetscFunctionBegin;
1274   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1280 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1281 {
1282   PetscErrorCode ierr;
1283   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1284 
1285   PetscFunctionBegin;
1286   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* uncomment for testing purposes */
1291 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1292 #undef __FUNCT__
1293 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1294 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1295 {
1296   PetscErrorCode    ierr;
1297   PC_IS*            pcis = (PC_IS*)(pc->data);
1298   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1299   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1300   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1301   MatType           impMatType=MATSEQAIJ;
1302   /* one and zero */
1303   PetscScalar       one=1.0,zero=0.0;
1304   /* space to store constraints and their local indices */
1305   PetscScalar       *temp_quadrature_constraint;
1306   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1307   /* iterators */
1308   PetscInt          i,j,k,total_counts,temp_start_ptr;
1309   /* stuff to store connected components stored in pcbddc->mat_graph */
1310   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1311   PetscInt          n_ISForFaces,n_ISForEdges;
1312   /* near null space stuff */
1313   MatNullSpace      nearnullsp;
1314   const Vec         *nearnullvecs;
1315   Vec               *localnearnullsp;
1316   PetscBool         nnsp_has_cnst;
1317   PetscInt          nnsp_size;
1318   PetscScalar       *array;
1319   /* BLAS integers */
1320   PetscBLASInt      lwork,lierr;
1321   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1322   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1323   /* LAPACK working arrays for SVD or POD */
1324   PetscBool         skip_lapack;
1325   PetscScalar       *work;
1326   PetscReal         *singular_vals;
1327 #if defined(PETSC_USE_COMPLEX)
1328   PetscReal         *rwork;
1329 #endif
1330 #if defined(PETSC_MISSING_LAPACK_GESVD)
1331   PetscBLASInt      Blas_one_2=1;
1332   PetscScalar       *temp_basis,*correlation_mat;
1333 #else
1334   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1335   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1336 #endif
1337   /* reuse */
1338   PetscInt          olocal_primal_size;
1339   PetscInt          *oprimal_indices_local_idxs;
1340   /* change of basis */
1341   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1342   PetscBool         boolforchange,qr_needed;
1343   PetscBT           touched,change_basis,qr_needed_idx;
1344   /* auxiliary stuff */
1345   PetscInt          *nnz,*is_indices,*local_to_B;
1346   /* some quantities */
1347   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1348   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1349 
1350 
1351   PetscFunctionBegin;
1352   /* Destroy Mat objects computed previously */
1353   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1354   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1355   /* Get index sets for faces, edges and vertices from graph */
1356   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1357     pcbddc->use_vertices = PETSC_TRUE;
1358   }
1359   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1360   /* HACK: provide functions to set change of basis */
1361   if (!ISForVertices && pcbddc->NullSpace) {
1362     pcbddc->use_change_of_basis = PETSC_TRUE;
1363     pcbddc->use_change_on_faces = PETSC_FALSE;
1364   }
1365   /* print some info */
1366   if (pcbddc->dbg_flag) {
1367     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1368     i = 0;
1369     if (ISForVertices) {
1370       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1371     }
1372     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1373     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1374     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1375     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1376   }
1377   /* check if near null space is attached to global mat */
1378   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1379   if (nearnullsp) {
1380     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1381     /* remove any stored info */
1382     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1383     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1384     /* store information for BDDC solver reuse */
1385     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1386     pcbddc->onearnullspace = nearnullsp;
1387     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1388     for (i=0;i<nnsp_size;i++) {
1389       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1390     }
1391   } else { /* if near null space is not provided BDDC uses constants by default */
1392     nnsp_size = 0;
1393     nnsp_has_cnst = PETSC_TRUE;
1394   }
1395   /* get max number of constraints on a single cc */
1396   max_constraints = nnsp_size;
1397   if (nnsp_has_cnst) max_constraints++;
1398 
1399   /*
1400        Evaluate maximum storage size needed by the procedure
1401        - temp_indices will contain start index of each constraint stored as follows
1402        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1403        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1404        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1405                                                                                                                                                          */
1406   total_counts = n_ISForFaces+n_ISForEdges;
1407   total_counts *= max_constraints;
1408   n_vertices = 0;
1409   if (ISForVertices) {
1410     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1411   }
1412   total_counts += n_vertices;
1413   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1414   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1415   total_counts = 0;
1416   max_size_of_constraint = 0;
1417   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1418     if (i<n_ISForEdges) {
1419       used_IS = &ISForEdges[i];
1420     } else {
1421       used_IS = &ISForFaces[i-n_ISForEdges];
1422     }
1423     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1424     total_counts += j;
1425     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1426   }
1427   total_counts *= max_constraints;
1428   total_counts += n_vertices;
1429   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1430   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1431   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1432   /* local to boundary numbering */
1433   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1434   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1435   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1436   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1437   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1438   /* get local part of global near null space vectors */
1439   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1440   for (k=0;k<nnsp_size;k++) {
1441     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1442     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444   }
1445 
1446   /* whether or not to skip lapack calls */
1447   skip_lapack = PETSC_TRUE;
1448   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1449 
1450   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1451   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1452     PetscScalar temp_work;
1453 #if defined(PETSC_MISSING_LAPACK_GESVD)
1454     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1455     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1456     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1457     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1458 #if defined(PETSC_USE_COMPLEX)
1459     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1460 #endif
1461     /* now we evaluate the optimal workspace using query with lwork=-1 */
1462     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1463     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1464     lwork = -1;
1465     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1466 #if !defined(PETSC_USE_COMPLEX)
1467     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1468 #else
1469     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1470 #endif
1471     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1472     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1473 #else /* on missing GESVD */
1474     /* SVD */
1475     PetscInt max_n,min_n;
1476     max_n = max_size_of_constraint;
1477     min_n = max_constraints;
1478     if (max_size_of_constraint < max_constraints) {
1479       min_n = max_size_of_constraint;
1480       max_n = max_constraints;
1481     }
1482     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1483 #if defined(PETSC_USE_COMPLEX)
1484     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1485 #endif
1486     /* now we evaluate the optimal workspace using query with lwork=-1 */
1487     lwork = -1;
1488     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1489     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1490     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1491     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1492 #if !defined(PETSC_USE_COMPLEX)
1493     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1494 #else
1495     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1496 #endif
1497     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1498     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1499 #endif /* on missing GESVD */
1500     /* Allocate optimal workspace */
1501     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1502     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1503   }
1504   /* Now we can loop on constraining sets */
1505   total_counts = 0;
1506   temp_indices[0] = 0;
1507   /* vertices */
1508   if (ISForVertices) {
1509     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1510     if (nnsp_has_cnst) { /* consider all vertices */
1511       for (i=0;i<n_vertices;i++) {
1512         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1513         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1514         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1515         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1516         total_counts++;
1517       }
1518     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1519       PetscBool used_vertex;
1520       for (i=0;i<n_vertices;i++) {
1521         used_vertex = PETSC_FALSE;
1522         k = 0;
1523         while (!used_vertex && k<nnsp_size) {
1524           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1525           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1526             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1527             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1528             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1529             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1530             total_counts++;
1531             used_vertex = PETSC_TRUE;
1532           }
1533           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1534           k++;
1535         }
1536       }
1537     }
1538     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1539     n_vertices = total_counts;
1540   }
1541 
1542   /* edges and faces */
1543   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1544     if (i<n_ISForEdges) {
1545       used_IS = &ISForEdges[i];
1546       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1547     } else {
1548       used_IS = &ISForFaces[i-n_ISForEdges];
1549       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1550     }
1551     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1552     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1553     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1554     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1555     /* change of basis should not be performed on local periodic nodes */
1556     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1557     if (nnsp_has_cnst) {
1558       PetscScalar quad_value;
1559       temp_constraints++;
1560       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1561       for (j=0;j<size_of_constraint;j++) {
1562         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1563         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1564         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1565       }
1566       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1567       total_counts++;
1568     }
1569     for (k=0;k<nnsp_size;k++) {
1570       PetscReal real_value;
1571       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1572       for (j=0;j<size_of_constraint;j++) {
1573         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1574         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1575         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1576       }
1577       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1578       /* check if array is null on the connected component */
1579       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1580       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1581       if (real_value > 0.0) { /* keep indices and values */
1582         temp_constraints++;
1583         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1584         total_counts++;
1585       }
1586     }
1587     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1588     valid_constraints = temp_constraints;
1589     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1590     if (!pcbddc->use_nnsp_true && temp_constraints) {
1591       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1592 
1593 #if defined(PETSC_MISSING_LAPACK_GESVD)
1594       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1595          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1596          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1597             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1598             from that computed using LAPACKgesvd
1599          -> This is due to a different computation of eigenvectors in LAPACKheev
1600          -> The quality of the POD-computed basis will be the same */
1601       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1602       /* Store upper triangular part of correlation matrix */
1603       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1604       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1605       for (j=0;j<temp_constraints;j++) {
1606         for (k=0;k<j+1;k++) {
1607           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1608         }
1609       }
1610       /* compute eigenvalues and eigenvectors of correlation matrix */
1611       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1612       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1613 #if !defined(PETSC_USE_COMPLEX)
1614       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1615 #else
1616       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1617 #endif
1618       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1619       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1620       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1621       j=0;
1622       while (j < temp_constraints && singular_vals[j] < tol) j++;
1623       total_counts=total_counts-j;
1624       valid_constraints = temp_constraints-j;
1625       /* scale and copy POD basis into used quadrature memory */
1626       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1628       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1629       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1630       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1631       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1632       if (j<temp_constraints) {
1633         PetscInt ii;
1634         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1635         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1636         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1637         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1638         for (k=0;k<temp_constraints-j;k++) {
1639           for (ii=0;ii<size_of_constraint;ii++) {
1640             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1641           }
1642         }
1643       }
1644 #else  /* on missing GESVD */
1645       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1646       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1647       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1648       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1649 #if !defined(PETSC_USE_COMPLEX)
1650       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1651 #else
1652       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1653 #endif
1654       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1655       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1656       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1657       k = temp_constraints;
1658       if (k > size_of_constraint) k = size_of_constraint;
1659       j = 0;
1660       while (j < k && singular_vals[k-j-1] < tol) j++;
1661       total_counts = total_counts-temp_constraints+k-j;
1662       valid_constraints = k-j;
1663 #endif /* on missing GESVD */
1664     }
1665     /* setting change_of_basis flag is safe now */
1666     if (boolforchange) {
1667       for (j=0;j<valid_constraints;j++) {
1668         PetscBTSet(change_basis,total_counts-j-1);
1669       }
1670     }
1671   }
1672   /* free index sets of faces, edges and vertices */
1673   for (i=0;i<n_ISForFaces;i++) {
1674     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1675   }
1676   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1677   for (i=0;i<n_ISForEdges;i++) {
1678     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1679   }
1680   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1681   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1682 
1683   /* free workspace */
1684   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1685     ierr = PetscFree(work);CHKERRQ(ierr);
1686 #if defined(PETSC_USE_COMPLEX)
1687     ierr = PetscFree(rwork);CHKERRQ(ierr);
1688 #endif
1689     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1690 #if defined(PETSC_MISSING_LAPACK_GESVD)
1691     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1692     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1693 #endif
1694   }
1695   for (k=0;k<nnsp_size;k++) {
1696     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1697   }
1698   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1699 
1700   /* set quantities in pcbddc data structure and store previous primal size */
1701   /* n_vertices defines the number of subdomain corners in the primal space */
1702   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1703   olocal_primal_size = pcbddc->local_primal_size;
1704   pcbddc->local_primal_size = total_counts;
1705   pcbddc->n_vertices = n_vertices;
1706   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1707 
1708   /* Create constraint matrix */
1709   /* The constraint matrix is used to compute the l2g map of primal dofs */
1710   /* so we need to set it up properly either with or without change of basis */
1711   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1712   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1713   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1714   /* array to compute a local numbering of constraints : vertices first then constraints */
1715   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1716   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1717   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1718   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1719   /* auxiliary stuff for basis change */
1720   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1721   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1722 
1723   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1724   total_primal_vertices=0;
1725   for (i=0;i<pcbddc->local_primal_size;i++) {
1726     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1727     if (size_of_constraint == 1) {
1728       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1729       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1730       aux_primal_minloc[total_primal_vertices]=0;
1731       total_primal_vertices++;
1732     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1733       PetscInt min_loc,min_index;
1734       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1735       /* find first untouched local node */
1736       k = 0;
1737       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1738       min_index = global_indices[k];
1739       min_loc = k;
1740       /* search the minimum among global nodes already untouched on the cc */
1741       for (k=1;k<size_of_constraint;k++) {
1742         /* there can be more than one constraint on a single connected component */
1743         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1744           min_index = global_indices[k];
1745           min_loc = k;
1746         }
1747       }
1748       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1749       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1750       aux_primal_minloc[total_primal_vertices]=min_loc;
1751       total_primal_vertices++;
1752     }
1753   }
1754   /* determine if a QR strategy is needed for change of basis */
1755   qr_needed = PETSC_FALSE;
1756   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1757   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1758     if (PetscBTLookup(change_basis,i)) {
1759       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1760       j = 0;
1761       for (k=0;k<size_of_constraint;k++) {
1762         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1763           j++;
1764         }
1765       }
1766       /* found more than one primal dof on the cc */
1767       if (j > 1) {
1768         PetscBTSet(qr_needed_idx,i);
1769         qr_needed = PETSC_TRUE;
1770       }
1771     }
1772   }
1773   /* free workspace */
1774   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1775 
1776   /* permute indices in order to have a sorted set of vertices */
1777   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1778 
1779   /* nonzero structure of constraint matrix */
1780   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1781   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1782   j=total_primal_vertices;
1783   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1784     if (!PetscBTLookup(change_basis,i)) {
1785       nnz[j]=temp_indices[i+1]-temp_indices[i];
1786       j++;
1787     }
1788   }
1789   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1790   ierr = PetscFree(nnz);CHKERRQ(ierr);
1791   /* set values in constraint matrix */
1792   for (i=0;i<total_primal_vertices;i++) {
1793     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1794   }
1795   total_counts = total_primal_vertices;
1796   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1797     if (!PetscBTLookup(change_basis,i)) {
1798       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1799       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1800       total_counts++;
1801     }
1802   }
1803   /* assembling */
1804   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806   /*
1807   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1808   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1809   */
1810   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1811   if (pcbddc->use_change_of_basis) {
1812     /* dual and primal dofs on a single cc */
1813     PetscInt     dual_dofs,primal_dofs;
1814     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1815     PetscInt     primal_counter;
1816     /* working stuff for GEQRF */
1817     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1818     PetscBLASInt lqr_work;
1819     /* working stuff for UNGQR */
1820     PetscScalar  *gqr_work,lgqr_work_t;
1821     PetscBLASInt lgqr_work;
1822     /* working stuff for TRTRS */
1823     PetscScalar  *trs_rhs;
1824     PetscBLASInt Blas_NRHS;
1825     /* pointers for values insertion into change of basis matrix */
1826     PetscInt     *start_rows,*start_cols;
1827     PetscScalar  *start_vals;
1828     /* working stuff for values insertion */
1829     PetscBT      is_primal;
1830 
1831     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1832     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1833     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1834     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1835     /* work arrays */
1836     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1837     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1838     /* nonzeros per row */
1839     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1840       if (PetscBTLookup(change_basis,i)) {
1841         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1842         if (PetscBTLookup(qr_needed_idx,i)) {
1843           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1844         } else {
1845           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1846           /* get local primal index on the cc */
1847           j = 0;
1848           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1849           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1850         }
1851       }
1852     }
1853     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1854     ierr = PetscFree(nnz);CHKERRQ(ierr);
1855     /* Set initial identity in the matrix */
1856     for (i=0;i<pcis->n_B;i++) {
1857       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1858     }
1859 
1860     if (pcbddc->dbg_flag) {
1861       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1862       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1863     }
1864 
1865 
1866     /* Now we loop on the constraints which need a change of basis */
1867     /*
1868        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1869        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1870 
1871        Basic blocks of change of basis matrix T computed
1872 
1873           - Using the following block transformation if there is only a primal dof on the cc
1874             (in the example, primal dof is the last one of the edge in LOCAL ordering
1875              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1876             | 1        0   ...        0     1 |
1877             | 0        1   ...        0     1 |
1878             |              ...                |
1879             | 0        ...            1     1 |
1880             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1881 
1882           - via QR decomposition of constraints otherwise
1883     */
1884     if (qr_needed) {
1885       /* space to store Q */
1886       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1887       /* first we issue queries for optimal work */
1888       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1889       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1890       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1891       lqr_work = -1;
1892       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1893       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1894       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1895       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1896       lgqr_work = -1;
1897       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1898       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1899       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1900       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1901       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1902       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1903       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1904       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1905       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1906       /* array to store scaling factors for reflectors */
1907       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1908       /* array to store rhs and solution of triangular solver */
1909       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1910       /* allocating workspace for check */
1911       if (pcbddc->dbg_flag) {
1912         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1913       }
1914     }
1915     /* array to store whether a node is primal or not */
1916     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1917     for (i=0;i<total_primal_vertices;i++) {
1918       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1919     }
1920 
1921     /* loop on constraints and see whether or not they need a change of basis and compute it */
1922     /* -> using implicit ordering contained in temp_indices data */
1923     total_counts = pcbddc->n_vertices;
1924     primal_counter = total_counts;
1925     while (total_counts<pcbddc->local_primal_size) {
1926       primal_dofs = 1;
1927       if (PetscBTLookup(change_basis,total_counts)) {
1928         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1929         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1930           primal_dofs++;
1931         }
1932         /* get constraint info */
1933         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1934         dual_dofs = size_of_constraint-primal_dofs;
1935 
1936         if (pcbddc->dbg_flag) {
1937           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1938         }
1939 
1940         if (primal_dofs > 1) { /* QR */
1941 
1942           /* copy quadrature constraints for change of basis check */
1943           if (pcbddc->dbg_flag) {
1944             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1945           }
1946           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1947           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1948 
1949           /* compute QR decomposition of constraints */
1950           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1951           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1952           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1953           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1954           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1955           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1956           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1957 
1958           /* explictly compute R^-T */
1959           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1960           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1961           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1962           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1963           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1964           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1965           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1966           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1967           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1968           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1969 
1970           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1971           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1975           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1976           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1977           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1978           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979 
1980           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1981              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1982              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1983           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1984           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1985           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1986           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1987           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1988           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1989           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1990           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1991           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1992           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1993 
1994           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1995           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1996           /* insert cols for primal dofs */
1997           for (j=0;j<primal_dofs;j++) {
1998             start_vals = &qr_basis[j*size_of_constraint];
1999             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2000             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2001           }
2002           /* insert cols for dual dofs */
2003           for (j=0,k=0;j<dual_dofs;k++) {
2004             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2005               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2006               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2007               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2008               j++;
2009             }
2010           }
2011 
2012           /* check change of basis */
2013           if (pcbddc->dbg_flag) {
2014             PetscInt   ii,jj;
2015             PetscBool valid_qr=PETSC_TRUE;
2016             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2017             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2018             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2019             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2020             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2021             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2022             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2023             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2024             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2025             for (jj=0;jj<size_of_constraint;jj++) {
2026               for (ii=0;ii<primal_dofs;ii++) {
2027                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2028                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2029               }
2030             }
2031             if (!valid_qr) {
2032               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2033               for (jj=0;jj<size_of_constraint;jj++) {
2034                 for (ii=0;ii<primal_dofs;ii++) {
2035                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2036                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2037                   }
2038                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2039                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2040                   }
2041                 }
2042               }
2043             } else {
2044               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2045             }
2046           }
2047         } else { /* simple transformation block */
2048           PetscInt row,col;
2049           PetscScalar val;
2050           for (j=0;j<size_of_constraint;j++) {
2051             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2052             if (!PetscBTLookup(is_primal,row)) {
2053               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2054               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2055               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2056             } else {
2057               for (k=0;k<size_of_constraint;k++) {
2058                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2059                 if (row != col) {
2060                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2061                 } else {
2062                   val = 1.0;
2063                 }
2064                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2065               }
2066             }
2067           }
2068         }
2069         /* increment primal counter */
2070         primal_counter += primal_dofs;
2071       } else {
2072         if (pcbddc->dbg_flag) {
2073           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2074         }
2075       }
2076       /* increment constraint counter total_counts */
2077       total_counts += primal_dofs;
2078     }
2079 
2080     /* free workspace */
2081     if (qr_needed) {
2082       if (pcbddc->dbg_flag) {
2083         ierr = PetscFree(work);CHKERRQ(ierr);
2084       }
2085       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2086       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2087       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2088       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2089       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2090     }
2091     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2092     /* assembling */
2093     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095     /*
2096     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2097     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2098     */
2099   }
2100 
2101   /* get indices in local ordering for vertices and constraints */
2102   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2103     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2104     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2105   }
2106   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2107   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2108   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2109   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2110   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2111   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2112   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2113   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2114   /* check if a new primal space has been introduced */
2115   pcbddc->new_primal_space_local = PETSC_TRUE;
2116   if (olocal_primal_size == pcbddc->local_primal_size) {
2117     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2118     pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local;
2119     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2120   }
2121   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2122   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2123 
2124   /* flush dbg viewer */
2125   if (pcbddc->dbg_flag) {
2126     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2127   }
2128 
2129   /* free workspace */
2130   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2131   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2132   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2133   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2134   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2135   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2136   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2137   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2138   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2144 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2145 {
2146   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2147   PC_IS       *pcis = (PC_IS*)pc->data;
2148   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2149   PetscInt    bs,ierr,i,vertex_size;
2150   PetscViewer viewer=pcbddc->dbg_viewer;
2151 
2152   PetscFunctionBegin;
2153   /* Reset previously computed graph */
2154   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2155   /* Init local Graph struct */
2156   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2157 
2158   /* Check validity of the csr graph passed in by the user */
2159   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2160     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2161   }
2162 
2163   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2164   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2165     Mat mat_adj;
2166     const PetscInt *xadj,*adjncy;
2167     PetscBool flg_row=PETSC_TRUE;
2168 
2169     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2170     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2171     if (!flg_row) {
2172       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2173     }
2174     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2175     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2176     if (!flg_row) {
2177       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2178     }
2179     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2180   }
2181 
2182   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2183   vertex_size = 1;
2184   if (!pcbddc->user_provided_isfordofs) {
2185     if (!pcbddc->n_ISForDofs) {
2186       IS *custom_ISForDofs;
2187 
2188       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2189       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2190       for (i=0;i<bs;i++) {
2191         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2192       }
2193       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2194       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2195       /* remove my references to IS objects */
2196       for (i=0;i<bs;i++) {
2197         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2198       }
2199       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2200     }
2201   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2202     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2203   }
2204 
2205   /* Setup of Graph */
2206   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2207 
2208   /* Graph's connected components analysis */
2209   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2210 
2211   /* print some info to stdout */
2212   if (pcbddc->dbg_flag) {
2213     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2214   }
2215 
2216   /* mark topography has done */
2217   pcbddc->recompute_topography = PETSC_FALSE;
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2223 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2224 {
2225   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2226   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   n = 0;
2231   vertices = 0;
2232   if (pcbddc->ConstraintMatrix) {
2233     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2234     for (i=0;i<local_primal_size;i++) {
2235       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2236       if (size_of_constraint == 1) n++;
2237       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2238     }
2239     if (vertices_idx) {
2240       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2241       n = 0;
2242       for (i=0;i<local_primal_size;i++) {
2243         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2244         if (size_of_constraint == 1) {
2245           vertices[n++]=row_cmat_indices[0];
2246         }
2247         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2248       }
2249     }
2250   }
2251   *n_vertices = n;
2252   if (vertices_idx) *vertices_idx = vertices;
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2258 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2259 {
2260   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2261   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2262   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2263   PetscBT        touched;
2264   PetscErrorCode ierr;
2265 
2266     /* This function assumes that the number of local constraints per connected component
2267        is not greater than the number of nodes defined for the connected component
2268        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2269   PetscFunctionBegin;
2270   n = 0;
2271   constraints_index = 0;
2272   if (pcbddc->ConstraintMatrix) {
2273     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2274     max_size_of_constraint = 0;
2275     for (i=0;i<local_primal_size;i++) {
2276       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2277       if (size_of_constraint > 1) {
2278         n++;
2279       }
2280       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2281       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2282     }
2283     if (constraints_idx) {
2284       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2285       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2286       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2287       n = 0;
2288       for (i=0;i<local_primal_size;i++) {
2289         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2290         if (size_of_constraint > 1) {
2291           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2292           /* find first untouched local node */
2293           j = 0;
2294           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2295           min_index = row_cmat_global_indices[j];
2296           min_loc = j;
2297           /* search the minimum among nodes not yet touched on the connected component
2298              since there can be more than one constraint on a single cc */
2299           for (j=1;j<size_of_constraint;j++) {
2300             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2301               min_index = row_cmat_global_indices[j];
2302               min_loc = j;
2303             }
2304           }
2305           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2306           constraints_index[n++] = row_cmat_indices[min_loc];
2307         }
2308         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2309       }
2310       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2311       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2312     }
2313   }
2314   *n_constraints = n;
2315   if (constraints_idx) *constraints_idx = constraints_index;
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 /* the next two functions has been adapted from pcis.c */
2320 #undef __FUNCT__
2321 #define __FUNCT__ "PCBDDCApplySchur"
2322 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2323 {
2324   PetscErrorCode ierr;
2325   PC_IS          *pcis = (PC_IS*)(pc->data);
2326 
2327   PetscFunctionBegin;
2328   if (!vec2_B) { vec2_B = v; }
2329   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2330   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2331   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2332   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2333   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2339 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2340 {
2341   PetscErrorCode ierr;
2342   PC_IS          *pcis = (PC_IS*)(pc->data);
2343 
2344   PetscFunctionBegin;
2345   if (!vec2_B) { vec2_B = v; }
2346   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2347   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2348   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2349   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2350   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "PCBDDCSubsetNumbering"
2356 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2357 {
2358   Vec            local_vec,global_vec;
2359   IS             seqis,paris;
2360   VecScatter     scatter_ctx;
2361   PetscScalar    *array;
2362   PetscInt       *temp_global_dofs;
2363   PetscScalar    globalsum;
2364   PetscInt       i,j,s;
2365   PetscInt       nlocals,first_index,old_index,max_local;
2366   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2367   PetscMPIInt    *dof_sizes,*dof_displs;
2368   PetscBool      first_found;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   /* mpi buffers */
2373   MPI_Comm_size(comm,&size_prec_comm);
2374   MPI_Comm_rank(comm,&rank_prec_comm);
2375   j = ( !rank_prec_comm ? size_prec_comm : 0);
2376   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2377   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2378   /* get maximum size of subset */
2379   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2380   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2381   max_local = 0;
2382   if (n_local_dofs) {
2383     max_local = temp_global_dofs[0];
2384     for (i=1;i<n_local_dofs;i++) {
2385       if (max_local < temp_global_dofs[i] ) {
2386         max_local = temp_global_dofs[i];
2387       }
2388     }
2389   }
2390   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2391   max_global++;
2392   max_local = 0;
2393   if (n_local_dofs) {
2394     max_local = local_dofs[0];
2395     for (i=1;i<n_local_dofs;i++) {
2396       if (max_local < local_dofs[i] ) {
2397         max_local = local_dofs[i];
2398       }
2399     }
2400   }
2401   max_local++;
2402   /* allocate workspace */
2403   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2404   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2405   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2406   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2407   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2408   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2409   /* create scatter */
2410   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2411   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2412   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2413   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2414   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2415   /* init array */
2416   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2417   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2418   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2419   if (local_dofs_mult) {
2420     for (i=0;i<n_local_dofs;i++) {
2421       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2422     }
2423   } else {
2424     for (i=0;i<n_local_dofs;i++) {
2425       array[local_dofs[i]]=1.0;
2426     }
2427   }
2428   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2429   /* scatter into global vec and get total number of global dofs */
2430   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2433   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2434   /* Fill global_vec with cumulative function for global numbering */
2435   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2436   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2437   nlocals = 0;
2438   first_index = -1;
2439   first_found = PETSC_FALSE;
2440   for (i=0;i<s;i++) {
2441     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2442       first_found = PETSC_TRUE;
2443       first_index = i;
2444     }
2445     nlocals += (PetscInt)PetscRealPart(array[i]);
2446   }
2447   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2448   if (!rank_prec_comm) {
2449     dof_displs[0]=0;
2450     for (i=1;i<size_prec_comm;i++) {
2451       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2452     }
2453   }
2454   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2455   if (first_found) {
2456     array[first_index] += (PetscScalar)nlocals;
2457     old_index = first_index;
2458     for (i=first_index+1;i<s;i++) {
2459       if (PetscRealPart(array[i]) > 0.0) {
2460         array[i] += array[old_index];
2461         old_index = i;
2462       }
2463     }
2464   }
2465   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2466   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2467   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2468   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2469   /* get global ordering of local dofs */
2470   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2471   if (local_dofs_mult) {
2472     for (i=0;i<n_local_dofs;i++) {
2473       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2474     }
2475   } else {
2476     for (i=0;i<n_local_dofs;i++) {
2477       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2478     }
2479   }
2480   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2481   /* free workspace */
2482   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2483   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2484   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2485   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2486   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2487   /* return pointer to global ordering of local dofs */
2488   *global_numbering_subset = temp_global_dofs;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2494 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2495 {
2496   PetscInt       i,j;
2497   PetscScalar    *alphas;
2498   PetscErrorCode ierr;
2499 
2500   PetscFunctionBegin;
2501   /* this implements stabilized Gram-Schmidt */
2502   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2503   for (i=0;i<n;i++) {
2504     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2505     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2506     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2507   }
2508   ierr = PetscFree(alphas);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatConvert_IS_AIJ"
2515 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2516 {
2517   Mat new_mat;
2518   Mat_IS *matis = (Mat_IS*)(mat->data);
2519   /* info on mat */
2520   PetscInt bs,rows,cols;
2521   PetscInt lrows,lcols;
2522   PetscInt local_rows,local_cols;
2523   PetscMPIInt nsubdomains;
2524   /* preallocation */
2525   Vec vec_dnz,vec_onz;
2526   PetscScalar *my_dnz,*my_onz,*array;
2527   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2528   PetscInt  index_row,index_col,owner;
2529   PetscInt  *local_indices,*global_indices;
2530   /* work */
2531   PetscInt i,j;
2532   PetscErrorCode ierr;
2533 
2534   PetscFunctionBegin;
2535   /* CHECKS */
2536   /* get info from mat */
2537   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2538   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2539   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2540   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2541 
2542   /* MAT_INITIAL_MATRIX */
2543   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2544   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2545   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2546   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2547   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2548   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2549 
2550   /*
2551     preallocation
2552   */
2553 
2554   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2555   /*
2556      Some vectors are needed to sum up properly on shared interface dofs.
2557      Note that preallocation is not exact, since it overestimates nonzeros
2558   */
2559 /*
2560   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2561   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2562   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2563   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2564 */
2565   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2566   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2567   /* All processes need to compute entire row ownership */
2568   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2569   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2570   for (i=0;i<nsubdomains;i++) {
2571     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2572       row_ownership[j]=i;
2573     }
2574   }
2575   /* map indices of local mat to global values */
2576   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2577   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2578   for (i=0;i<local_rows;i++) local_indices[i]=i;
2579   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2580   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2581 
2582   /*
2583      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2584      then, they will be summed up properly. This way, preallocation is always sufficient
2585   */
2586   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2587   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2588   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2589   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2590   for (i=0;i<local_rows;i++) {
2591     index_row = global_indices[i];
2592     for (j=i;j<local_rows;j++) {
2593       owner = row_ownership[index_row];
2594       index_col = global_indices[j];
2595       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2596         my_dnz[i] += 1.0;
2597       } else { /* offdiag block */
2598         my_onz[i] += 1.0;
2599       }
2600       /* same as before, interchanging rows and cols */
2601       if (i != j) {
2602         owner = row_ownership[index_col];
2603         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2604           my_dnz[j] += 1.0;
2605         } else {
2606           my_onz[j] += 1.0;
2607         }
2608       }
2609     }
2610   }
2611   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2612   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2613   if (local_rows) { /* multilevel guard */
2614     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2615     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2616   }
2617   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2618   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2619   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2620   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2621   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2622   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2623   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2624 
2625   /* set computed preallocation in dnz and onz */
2626   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2627   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2628   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2629   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2630   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2631   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2632   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2633   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2634 
2635   /* Resize preallocation if overestimated */
2636   for (i=0;i<lrows;i++) {
2637     dnz[i] = PetscMin(dnz[i],lcols);
2638     onz[i] = PetscMin(onz[i],cols-lcols);
2639   }
2640   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2641   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2642 
2643   /*
2644     Set values. Very Basic.
2645   */
2646   for (i=0;i<local_rows;i++) {
2647     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2648     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2649     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2650     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2651     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2652   }
2653   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2655   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656 
2657   /* get back mat */
2658   *M = new_mat;
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatISSubassemble_Private"
2664 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2665 {
2666   Mat             subdomain_adj;
2667   IS              new_ranks,ranks_send_to;
2668   MatPartitioning partitioner;
2669   Mat_IS          *matis;
2670   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2671   PetscInt        prank;
2672   PetscMPIInt     size,rank,color;
2673   PetscInt        *xadj,*adjncy,*oldranks;
2674   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2675   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2676   PetscErrorCode  ierr;
2677   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2678   PetscSubcomm    subcomm;
2679 
2680   PetscFunctionBegin;
2681   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2682   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2683   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2684 
2685   /* Get info on mapping */
2686   matis = (Mat_IS*)(mat->data);
2687   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2688   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2689 
2690   /* build local CSR graph of subdomains' connectivity */
2691   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2692   xadj[0] = 0;
2693   xadj[1] = PetscMax(n_neighs-1,0);
2694   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2695   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2696 
2697   if (threshold) {
2698     PetscInt* count,min_threshold;
2699     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2700     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2701     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2702       for (j=0;j<n_shared[i];j++) {
2703         count[shared[i][j]] += 1;
2704       }
2705     }
2706     /* adapt threshold since we dont want to lose significant connections */
2707     min_threshold = n_neighs;
2708     for (i=1;i<n_neighs;i++) {
2709       for (j=0;j<n_shared[i];j++) {
2710         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2711       }
2712     }
2713     threshold = PetscMax(min_threshold+1,threshold);
2714     xadj[1] = 0;
2715     for (i=1;i<n_neighs;i++) {
2716       for (j=0;j<n_shared[i];j++) {
2717         if (count[shared[i][j]] < threshold) {
2718           adjncy[xadj[1]] = neighs[i];
2719           adjncy_wgt[xadj[1]] = n_shared[i];
2720           xadj[1]++;
2721           break;
2722         }
2723       }
2724     }
2725     ierr = PetscFree(count);CHKERRQ(ierr);
2726   } else {
2727     if (xadj[1]) {
2728       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2729       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2730     }
2731   }
2732   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2733   if (use_square) {
2734     for (i=0;i<xadj[1];i++) {
2735       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2736     }
2737   }
2738   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2739 
2740   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2741 
2742   /*
2743     Restrict work on active processes only.
2744   */
2745   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2746   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2747   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2748   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2749   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2750   if (color) {
2751     ierr = PetscFree(xadj);CHKERRQ(ierr);
2752     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2753     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2754   } else {
2755     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2756     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2757     prank = rank;
2758     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2759     for (i=0;i<size;i++) {
2760       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2761     }
2762     for (i=0;i<xadj[1];i++) {
2763       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2764     }
2765     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2766     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2767     n_subdomains = (PetscInt)size;
2768     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2769 
2770     /* Partition */
2771     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2772     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2773     if (use_vwgt) {
2774       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2775       v_wgt[0] = local_size;
2776       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2777     }
2778     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2779     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2780     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2781     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2782     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2783 
2784     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2785     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2786     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2787     /* clean up */
2788     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2789     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2790     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2791     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2792   }
2793   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2794 
2795   /* assemble parallel IS for sends */
2796   i = 1;
2797   if (color) i=0;
2798   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2799   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2800 
2801   /* get back IS */
2802   *is_sends = ranks_send_to;
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatISSubassemble"
2810 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2811 {
2812   Mat                    local_mat,new_mat;
2813   Mat_IS                 *matis;
2814   IS                     is_sends_internal;
2815   PetscInt               rows,cols;
2816   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2817   PetscBool              ismatis,isdense;
2818   ISLocalToGlobalMapping l2gmap;
2819   PetscInt*              l2gmap_indices;
2820   const PetscInt*        is_indices;
2821   MatType                new_local_type;
2822   MatTypePrivate         new_local_type_private;
2823   /* buffers */
2824   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2825   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2826   /* MPI */
2827   MPI_Comm               comm;
2828   PetscMPIInt            n_sends,n_recvs,commsize;
2829   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2830   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2831   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2832   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2833   PetscErrorCode         ierr;
2834 
2835   PetscFunctionBegin;
2836   /* checks */
2837   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2838   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2839   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2840   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2841   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2842   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2843   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2844   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2845   PetscValidLogicalCollectiveInt(mat,bs,0);
2846   /* prepare IS for sending if not provided */
2847   if (!is_sends) {
2848     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2849     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2850   } else {
2851     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2852     is_sends_internal = is_sends;
2853   }
2854 
2855   /* get pointer of MATIS data */
2856   matis = (Mat_IS*)mat->data;
2857 
2858   /* get comm */
2859   comm = PetscObjectComm((PetscObject)mat);
2860 
2861   /* compute number of sends */
2862   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2863   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2864 
2865   /* compute number of receives */
2866   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2867   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2868   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2869   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2870   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2871   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2872   ierr = PetscFree(iflags);CHKERRQ(ierr);
2873 
2874   /* prepare send/receive buffers */
2875   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2876   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2877   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2878   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2879 
2880   /* Get data from local mat */
2881   if (!isdense) {
2882     /* TODO: See below some guidelines on how to prepare the local buffers */
2883     /*
2884        send_buffer_vals should contain the raw values of the local matrix
2885        send_buffer_idxs should contain:
2886        - MatType_PRIVATE type
2887        - PetscInt        size_of_l2gmap
2888        - PetscInt        global_row_indices[size_of_l2gmap]
2889        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2890     */
2891   } else {
2892     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2893     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2894     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2895     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2896     send_buffer_idxs[1] = i;
2897     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2898     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2899     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2900     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2901     for (i=0;i<n_sends;i++) {
2902       ilengths_vals[is_indices[i]] = len*len;
2903       ilengths_idxs[is_indices[i]] = len+2;
2904     }
2905   }
2906   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2907   buf_size_idxs = 0;
2908   buf_size_vals = 0;
2909   for (i=0;i<n_recvs;i++) {
2910     buf_size_idxs += (PetscInt)olengths_idxs[i];
2911     buf_size_vals += (PetscInt)olengths_vals[i];
2912   }
2913   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2914   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2915 
2916   /* get new tags for clean communications */
2917   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2918   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2919 
2920   /* allocate for requests */
2921   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2922   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2923   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2924   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2925 
2926   /* communications */
2927   ptr_idxs = recv_buffer_idxs;
2928   ptr_vals = recv_buffer_vals;
2929   for (i=0;i<n_recvs;i++) {
2930     source_dest = onodes[i];
2931     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2932     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2933     ptr_idxs += olengths_idxs[i];
2934     ptr_vals += olengths_vals[i];
2935   }
2936   for (i=0;i<n_sends;i++) {
2937     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2938     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2939     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2940   }
2941   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2942   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2943 
2944   /* assemble new l2g map */
2945   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2946   ptr_idxs = recv_buffer_idxs;
2947   buf_size_idxs = 0;
2948   for (i=0;i<n_recvs;i++) {
2949     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2950     ptr_idxs += olengths_idxs[i];
2951   }
2952   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2953   ptr_idxs = recv_buffer_idxs;
2954   buf_size_idxs = 0;
2955   for (i=0;i<n_recvs;i++) {
2956     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2957     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2958     ptr_idxs += olengths_idxs[i];
2959   }
2960   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2961   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2962   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2963 
2964   /* infer new local matrix type from received local matrices type */
2965   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2966   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2967   new_local_type_private = MATAIJ_PRIVATE;
2968   new_local_type = MATSEQAIJ;
2969   if (n_recvs) {
2970     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2971     ptr_idxs = recv_buffer_idxs;
2972     for (i=0;i<n_recvs;i++) {
2973       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2974         new_local_type_private = MATAIJ_PRIVATE;
2975         break;
2976       }
2977       ptr_idxs += olengths_idxs[i];
2978     }
2979     switch (new_local_type_private) {
2980       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2981         new_local_type = MATSEQAIJ;
2982         bs = 1;
2983         break;
2984       case MATAIJ_PRIVATE:
2985         new_local_type = MATSEQAIJ;
2986         bs = 1;
2987         break;
2988       case MATBAIJ_PRIVATE:
2989         new_local_type = MATSEQBAIJ;
2990         break;
2991       case MATSBAIJ_PRIVATE:
2992         new_local_type = MATSEQSBAIJ;
2993         break;
2994       default:
2995         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2996         break;
2997     }
2998   }
2999 
3000   /* create MATIS object */
3001   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3002   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3003   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3004   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3005   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3006   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3007 
3008   /* set values */
3009   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3010   ptr_vals = recv_buffer_vals;
3011   ptr_idxs = recv_buffer_idxs;
3012   for (i=0;i<n_recvs;i++) {
3013     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3014       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3015       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3016       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3017       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3018       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3019     }
3020     ptr_idxs += olengths_idxs[i];
3021     ptr_vals += olengths_vals[i];
3022   }
3023   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3026   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3027 
3028   { /* check */
3029     Vec       lvec,rvec;
3030     PetscReal infty_error;
3031 
3032     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3033     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3034     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3035     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3036     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3037     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3038     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3039     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3040     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3041   }
3042 
3043   /* free workspace */
3044   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3045   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3046   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3047   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3048   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3049   if (isdense) {
3050     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3051     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3052   } else {
3053     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3054   }
3055   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3056   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3057   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3058   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3059   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3060   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3061   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3062   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3063   ierr = PetscFree(onodes);CHKERRQ(ierr);
3064   /* get back new mat */
3065   *mat_n = new_mat;
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3071 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3072 {
3073   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3074   PC_IS                  *pcis = (PC_IS*)pc->data;
3075   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3076   MatNullSpace           CoarseNullSpace=NULL;
3077   ISLocalToGlobalMapping coarse_islg;
3078   IS                     coarse_is;
3079   PetscInt               max_it;
3080   PetscInt               im_active=-1,active_procs=-1;
3081   PC                     pc_temp;
3082   PCType                 coarse_pc_type;
3083   KSPType                coarse_ksp_type;
3084   PetscBool              multilevel_requested,multilevel_allowed;
3085   PetscBool              setsym,issym,isbddc,isnn;
3086   MatStructure           matstruct;
3087   PetscErrorCode         ierr;
3088 
3089   PetscFunctionBegin;
3090   /* Assign global numbering to coarse dofs */
3091   if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */
3092     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3093     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3094     /* see if we can avoid some work */
3095     if (pcbddc->coarse_ksp) { /* already present */
3096       Mat tcoarse_mat;
3097       PetscInt ocoarse_size;
3098       ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr);
3099       ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr);
3100       if (ocoarse_size != pcbddc->coarse_size) { /* we need to reset the KSP */
3101         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3102       }
3103     }
3104   }
3105 
3106   /* infer some info from user */
3107   issym = PETSC_FALSE;
3108   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3109   multilevel_allowed = PETSC_FALSE;
3110   multilevel_requested = PETSC_FALSE;
3111   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3112   if (multilevel_requested) {
3113     /* count "active processes" */
3114     im_active = !!(pcis->n);
3115     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3116     if (active_procs/pcbddc->coarsening_ratio < 2) {
3117       multilevel_allowed = PETSC_FALSE;
3118     } else {
3119       multilevel_allowed = PETSC_TRUE;
3120     }
3121   }
3122 
3123   /* set defaults for coarse KSP and PC */
3124   if (multilevel_allowed) {
3125     if (issym) {
3126       coarse_ksp_type = KSPRICHARDSON;
3127     } else {
3128       coarse_ksp_type = KSPCHEBYSHEV;
3129     }
3130     coarse_pc_type = PCBDDC;
3131   } else {
3132     coarse_ksp_type = KSPPREONLY;
3133     coarse_pc_type = PCREDUNDANT;
3134   }
3135 
3136   /* create the coarse KSP object only once with defaults */
3137   if (!pcbddc->coarse_ksp) {
3138     char prefix[256],str_level[3];
3139     size_t len;
3140     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3141     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3142     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3143     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3144     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3145     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3146     /* prefix */
3147     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3148     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3149     if (!pcbddc->current_level) {
3150       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3151       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3152     } else {
3153       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3154       if (pcbddc->current_level>1) len -= 2;
3155       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3156       *(prefix+len)='\0';
3157       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3158       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3159     }
3160     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3161   }
3162   /* allow user customization */
3163   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3164   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3165   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3166 
3167   /* get some info after set from options */
3168   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3169   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3170   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3171   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3172     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3173     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3174     isbddc = PETSC_FALSE;
3175   }
3176 
3177   /* propagate BDDC info to the next level */
3178   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3179   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3180   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3181 
3182   /* creates temporary MATIS object for coarse matrix */
3183   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3184   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3185   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3186   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3187   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3188   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3189   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3190   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3191   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3192 
3193   /* assemble coarse matrix */
3194   if (isbddc || isnn) {
3195     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3196   } else { /* need to provide reuse */
3197     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3198   }
3199   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3200 
3201   /* create local to global scatters for coarse problem */
3202   if (pcbddc->new_primal_space) {
3203     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3204     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3205     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3206     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3207     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3208   }
3209   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3210 
3211   /* propagate symmetry info to coarse matrix */
3212   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3213 
3214   /* Compute coarse null space (special handling by BDDC only) */
3215   if (pcbddc->NullSpace) {
3216     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3217     if (isbddc) {
3218       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3219     } else {
3220       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3221     }
3222   }
3223 
3224   /* set operators */
3225   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3226   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3227 
3228   /* additional KSP customization */
3229   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3230   if (max_it < 5) {
3231     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3232   }
3233   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3234 
3235 
3236   /* print some info if requested */
3237   if (pcbddc->dbg_flag) {
3238     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3239     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3240     if (!multilevel_allowed) {
3241       if (multilevel_requested) {
3242         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3243       } else if (pcbddc->max_levels) {
3244         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3245       }
3246     }
3247     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3248     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3249   }
3250 
3251   /* setup coarse ksp */
3252   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3253   if (pcbddc->dbg_flag) {
3254     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3255     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3256     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3257     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3258   }
3259 
3260   /* Check coarse problem if requested */
3261   if (pcbddc->dbg_flag) {
3262     KSP       check_ksp;
3263     KSPType   check_ksp_type;
3264     PC        check_pc;
3265     Vec       check_vec;
3266     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3267     PetscInt  its;
3268     PetscBool ispreonly,compute;
3269 
3270     /* Create ksp object suitable for estimation of extreme eigenvalues */
3271     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3272     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3273     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3274     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3275     if (ispreonly) {
3276       check_ksp_type = KSPPREONLY;
3277       compute = PETSC_FALSE;
3278     } else {
3279       if (issym) check_ksp_type = KSPCG;
3280       else check_ksp_type = KSPGMRES;
3281       compute = PETSC_TRUE;
3282     }
3283     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3284     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3285     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3286     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3287     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3288     /* create random vec */
3289     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3290     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3291     if (CoarseNullSpace) {
3292       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3293     }
3294     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3295     /* solve coarse problem */
3296     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3297     if (CoarseNullSpace) {
3298       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3299     }
3300     /* check coarse problem residual error */
3301     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3302     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3303     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3304     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3305     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3306     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3307     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3308     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3309     /* get eigenvalue estimation if preonly has not been requested */
3310     if (!ispreonly) {
3311       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3312       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3313       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3314     }
3315     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3316     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3317   }
3318   /* free memory */
3319   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3320   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 #undef __FUNCT__
3325 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3326 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3327 {
3328   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3329   PC_IS*         pcis = (PC_IS*)pc->data;
3330   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3331   PetscInt       i,coarse_size;
3332   PetscInt       *local_primal_indices;
3333   PetscErrorCode ierr;
3334 
3335   PetscFunctionBegin;
3336   /* Compute global number of coarse dofs */
3337   if (!pcbddc->primal_indices_local_idxs) {
3338     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3339   }
3340   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3341 
3342   /* check numbering */
3343   if (pcbddc->dbg_flag) {
3344     PetscScalar coarsesum,*array;
3345 
3346     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3347     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3348     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3349     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3350     for (i=0;i<pcbddc->local_primal_size;i++) {
3351       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3352     }
3353     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3354     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3355     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3356     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3357     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3358     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3359     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3360     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3361     for (i=0;i<pcis->n;i++) {
3362       if (array[i] == 1.0) {
3363         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3364       }
3365     }
3366     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3367     for (i=0;i<pcis->n;i++) {
3368       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3369     }
3370     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3371     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3372     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3373     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3374     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3375     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3376     if (pcbddc->dbg_flag > 1) {
3377       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3378       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3379       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3380       for (i=0;i<pcbddc->local_primal_size;i++) {
3381         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3382       }
3383       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3384     }
3385     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3386   }
3387   /* get back data */
3388   *coarse_size_n = coarse_size;
3389   *local_primal_indices_n = local_primal_indices;
3390   PetscFunctionReturn(0);
3391 }
3392 
3393