xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8e61c736f2eda2ed2716b02acca791a6eaf96098)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559   }
560   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
561   /* Checking coarse_sub_mat and coarse basis functios */
562   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
563   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
564   if (pcbddc->dbg_flag) {
565     Mat         coarse_sub_mat;
566     Mat         AUXMAT,TM1,TM2,TM3,TM4;
567     Mat         coarse_phi_D,coarse_phi_B;
568     Mat         coarse_psi_D,coarse_psi_B;
569     Mat         A_II,A_BB,A_IB,A_BI;
570     MatType     checkmattype=MATSEQAIJ;
571     PetscReal   real_value;
572 
573     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
574     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
575     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
576     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
577     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
578     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
581       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
582     }
583     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
584 
585     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
586     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
587     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
588     if (pcbddc->coarse_psi_B) {
589       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
593       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
594       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     } else {
602       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
603       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     }
611     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
612     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
613     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
614     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
615     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
616     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
617     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
618     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
619     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
620     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
621     for (i=0;i<pcbddc->local_primal_size;i++) {
622       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
623     }
624     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
625     for (i=0;i<pcbddc->local_primal_size;i++) {
626       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
627     }
628     if (pcbddc->coarse_psi_B) {
629       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
630       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
631         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
632       }
633       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
634       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
635         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
636       }
637     }
638     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
639     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
640     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
641     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
643     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
644     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
645     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
646     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
647     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
648     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
649     if (pcbddc->coarse_psi_B) {
650       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
651       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
652     }
653     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
654     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
655     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
656     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
657   }
658   /* free memory */
659   if (n_vertices) {
660     ierr = PetscFree(vertices);CHKERRQ(ierr);
661     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
662     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
663     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
664     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
665     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
666   }
667   if (n_constraints) {
668     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
669     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
670     ierr = MatDestroy(&M1);CHKERRQ(ierr);
671     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
672   }
673   ierr = PetscFree(auxindices);CHKERRQ(ierr);
674   /* get back data */
675   *coarse_submat_vals_n = coarse_submat_vals;
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
681 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
682 {
683   PC_IS*            pcis = (PC_IS*)(pc->data);
684   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
685   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
686   PetscBool         issbaij,isbaij;
687   /* manage repeated solves */
688   MatReuse          reuse;
689   MatStructure      matstruct;
690   PetscErrorCode    ierr;
691 
692   PetscFunctionBegin;
693   /* get mat flags */
694   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
695   reuse = MAT_INITIAL_MATRIX;
696   if (pc->setupcalled) {
697     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
698     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
699     if (matstruct == SAME_NONZERO_PATTERN) {
700       reuse = MAT_REUSE_MATRIX;
701     } else {
702       reuse = MAT_INITIAL_MATRIX;
703     }
704   }
705   if (reuse == MAT_INITIAL_MATRIX) {
706     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
707     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
708     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
709     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
710     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
711   }
712 
713   /* transform local matrices if needed */
714   if (pcbddc->use_change_of_basis) {
715     Mat         change_mat_all;
716     PetscScalar *row_cmat_values;
717     PetscInt    *row_cmat_indices;
718     PetscInt    *nnz,*is_indices,*temp_indices;
719     PetscInt    i,j,k,n_D,n_B;
720 
721     /* Get Non-overlapping dimensions */
722     n_B = pcis->n_B;
723     n_D = pcis->n-n_B;
724 
725     /* compute nonzero structure of change of basis on all local nodes */
726     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
727     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
729     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
731     k=1;
732     for (i=0;i<n_B;i++) {
733       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
734       nnz[is_indices[i]]=j;
735       if (k < j) k = j;
736       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
737     }
738     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     /* assemble change of basis matrix on the whole set of local dofs */
740     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
741     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
742     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
743     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
744     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
745     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
746     for (i=0;i<n_D;i++) {
747       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
748     }
749     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
750     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
751     for (i=0;i<n_B;i++) {
752       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
753       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
754       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
755       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
756     }
757     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
759     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
760     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
761     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
762     if (!issbaij && !isbaij) {
763       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
764     } else {
765       Mat work_mat;
766       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
767       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
768       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
769     }
770     /*
771     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
772     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
773     */
774     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
775     ierr = PetscFree(nnz);CHKERRQ(ierr);
776     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
777   } else {
778     /* without change of basis, the local matrix is unchanged */
779     if (!pcbddc->local_mat) {
780       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
781       pcbddc->local_mat = matis->A;
782     }
783   }
784 
785   /* get submatrices */
786   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
787   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
788   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
789   if (!issbaij) {
790     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
791     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
792   } else {
793     Mat newmat;
794     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
795     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
796     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
797     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
804 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
805 {
806   PC_IS*         pcis = (PC_IS*)(pc->data);
807   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
808   IS             is_aux1,is_aux2;
809   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
810   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
811   PetscInt       vbs,bs;
812   PetscBT        bitmask;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   /* Set Non-overlapping dimensions */
817   n_B = pcis->n_B; n_D = pcis->n - n_B;
818   /* get vertex indices from constraint matrix */
819   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
820 
821   /* create auxiliary bitmask */
822   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
823   for (i=0;i<n_vertices;i++) {
824     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
825   }
826 
827   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
828   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
829   for (i=0, n_R=0; i<pcis->n; i++) {
830     if (!PetscBTLookup(bitmask,i)) {
831       idx_R_local[n_R] = i;
832       n_R++;
833     }
834   }
835 
836   /* Block code */
837   vbs = 1;
838   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
839   if (bs>1 && !(n_vertices%bs)) {
840     PetscBool is_blocked = PETSC_TRUE;
841     PetscInt  *vary;
842     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
843     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
844     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
845     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
846     for (i=0; i<n_vertices; i++) {
847       if (vary[i]!=0 && vary[i]!=bs) {
848         is_blocked = PETSC_FALSE;
849         break;
850       }
851     }
852     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
853       vbs = bs;
854       for (i=0;i<n_R/vbs;i++) {
855         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
856       }
857     }
858     ierr = PetscFree(vary);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(vertices);CHKERRQ(ierr);
861   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
862   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
863 
864   /* print some info if requested */
865   if (pcbddc->dbg_flag) {
866     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
867     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
868     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
869     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
870     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
871     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
872     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
873   }
874 
875   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
876   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
877   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
878   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
879   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
880   for (i=0; i<n_D; i++) {
881     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
882   }
883   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
884   for (i=0, j=0; i<n_R; i++) {
885     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
886       aux_array1[j++] = i;
887     }
888   }
889   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
890   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
891   for (i=0, j=0; i<n_B; i++) {
892     if (!PetscBTLookup(bitmask,is_indices[i])) {
893       aux_array2[j++] = i;
894     }
895   }
896   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
897   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
898   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
899   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
900   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
901 
902   if (pcbddc->switch_static || pcbddc->dbg_flag) {
903     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
904     for (i=0, j=0; i<n_R; i++) {
905       if (PetscBTLookup(bitmask,idx_R_local[i])) {
906         aux_array1[j++] = i;
907       }
908     }
909     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
910     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
911     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
912   }
913   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
914   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
921 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
922 {
923   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
924   PC_IS          *pcis = (PC_IS*)pc->data;
925   PC             pc_temp;
926   Mat            A_RR;
927   Vec            vec1,vec2,vec3;
928   MatStructure   matstruct;
929   PetscScalar    m_one = -1.0;
930   PetscReal      value;
931   PetscInt       n_D,n_R,ibs,mbs;
932   PetscBool      use_exact,use_exact_reduced,issbaij;
933   PetscErrorCode ierr;
934   /* prefixes stuff */
935   char           dir_prefix[256],neu_prefix[256],str_level[3];
936   size_t         len;
937 
938   PetscFunctionBegin;
939   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
940 
941   /* compute prefixes */
942   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
943   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
944   if (!pcbddc->current_level) {
945     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
946     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
947     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
948     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
949   } else {
950     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
951     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
952     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
953     len -= 15; /* remove "pc_bddc_coarse_" */
954     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
955     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
956     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
957     *(dir_prefix+len)='\0';
958     *(neu_prefix+len)='\0';
959     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
960     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
961     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
962     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
963   }
964 
965   /* DIRICHLET PROBLEM */
966   /* Matrix for Dirichlet problem is pcis->A_II */
967   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
968   if (!pcbddc->ksp_D) { /* create object if not yet build */
969     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
970     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
971     /* default */
972     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
973     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
974     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
975     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
976     if (issbaij) {
977       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
978     } else {
979       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
980     }
981     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
982   }
983   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
984   /* Allow user's customization */
985   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
986   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
987   if (!n_D) {
988     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
989     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
990   }
991   /* Set Up KSP for Dirichlet problem of BDDC */
992   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
993   /* set ksp_D into pcis data */
994   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
995   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
996   pcis->ksp_D = pcbddc->ksp_D;
997 
998   /* NEUMANN PROBLEM */
999   /* Matrix for Neumann problem is A_RR -> we need to create it */
1000   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1001   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1002   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1003   if (ibs != mbs) {
1004     Mat newmat;
1005     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1006     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1007     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1008   } else {
1009     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1010   }
1011   if (!pcbddc->ksp_R) { /* create object if not yet build */
1012     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1013     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1014     /* default */
1015     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1016     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1017     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1018     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1019     if (issbaij) {
1020       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1021     } else {
1022       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1023     }
1024     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1025   }
1026   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1027   /* Allow user's customization */
1028   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1029   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1030   if (!n_R) {
1031     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1032     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1033   }
1034   /* Set Up KSP for Neumann problem of BDDC */
1035   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1036 
1037   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1038 
1039   /* Dirichlet */
1040   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1041   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1042   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1043   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1044   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1045   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1046   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1047   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1048   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1049   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1050   /* need to be adapted? */
1051   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1052   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1053   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1054   /* print info */
1055   if (pcbddc->dbg_flag) {
1056     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1057     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1058     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1059     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1060     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1061   }
1062   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1063     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1064   }
1065 
1066   /* Neumann */
1067   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1068   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1069   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1070   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1071   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1072   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1073   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1074   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1075   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1076   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1077   /* need to be adapted? */
1078   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1079   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1080   /* print info */
1081   if (pcbddc->dbg_flag) {
1082     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1083     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1084   }
1085   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1086     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1087   }
1088   /* free Neumann problem's matrix */
1089   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1095 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1096 {
1097   PetscErrorCode ierr;
1098   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1099 
1100   PetscFunctionBegin;
1101   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1102   if (pcbddc->local_auxmat1) {
1103     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1104     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1111 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1112 {
1113   PetscErrorCode ierr;
1114   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1115   PC_IS*            pcis = (PC_IS*)  (pc->data);
1116   const PetscScalar zero = 0.0;
1117 
1118   PetscFunctionBegin;
1119   /* Application of PHI^T (or PSI^T)  */
1120   if (pcbddc->coarse_psi_B) {
1121     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1122     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1123   } else {
1124     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1125     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1126   }
1127   /* Scatter data of coarse_rhs */
1128   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1129   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1130 
1131   /* Local solution on R nodes */
1132   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1133   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1135   if (pcbddc->switch_static) {
1136     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   }
1139   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1140   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1141   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   if (pcbddc->switch_static) {
1144     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146   }
1147 
1148   /* Coarse solution */
1149   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1151     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1152   }
1153   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1154   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155 
1156   /* Sum contributions from two levels */
1157   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1158   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1164 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1165 {
1166   PetscErrorCode ierr;
1167   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1168 
1169   PetscFunctionBegin;
1170   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1176 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1177 {
1178   PetscErrorCode ierr;
1179   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1180 
1181   PetscFunctionBegin;
1182   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 /* uncomment for testing purposes */
1187 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1188 #undef __FUNCT__
1189 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1190 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1191 {
1192   PetscErrorCode    ierr;
1193   PC_IS*            pcis = (PC_IS*)(pc->data);
1194   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1195   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1196   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1197   MatType           impMatType=MATSEQAIJ;
1198   /* one and zero */
1199   PetscScalar       one=1.0,zero=0.0;
1200   /* space to store constraints and their local indices */
1201   PetscScalar       *temp_quadrature_constraint;
1202   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1203   /* iterators */
1204   PetscInt          i,j,k,total_counts,temp_start_ptr;
1205   /* stuff to store connected components stored in pcbddc->mat_graph */
1206   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1207   PetscInt          n_ISForFaces,n_ISForEdges;
1208   /* near null space stuff */
1209   MatNullSpace      nearnullsp;
1210   const Vec         *nearnullvecs;
1211   Vec               *localnearnullsp;
1212   PetscBool         nnsp_has_cnst;
1213   PetscInt          nnsp_size;
1214   PetscScalar       *array;
1215   /* BLAS integers */
1216   PetscBLASInt      lwork,lierr;
1217   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1218   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1219   /* LAPACK working arrays for SVD or POD */
1220   PetscBool         skip_lapack;
1221   PetscScalar       *work;
1222   PetscReal         *singular_vals;
1223 #if defined(PETSC_USE_COMPLEX)
1224   PetscReal         *rwork;
1225 #endif
1226 #if defined(PETSC_MISSING_LAPACK_GESVD)
1227   PetscBLASInt      Blas_one_2=1;
1228   PetscScalar       *temp_basis,*correlation_mat;
1229 #else
1230   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1231   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1232 #endif
1233   /* change of basis */
1234   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1235   PetscBool         boolforchange,qr_needed;
1236   PetscBT           touched,change_basis,qr_needed_idx;
1237   /* auxiliary stuff */
1238   PetscInt          *nnz,*is_indices,*local_to_B;
1239   /* some quantities */
1240   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1241   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1242 
1243 
1244   PetscFunctionBegin;
1245   /* Destroy Mat objects computed previously */
1246   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1247   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1248   /* Get index sets for faces, edges and vertices from graph */
1249   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1250     pcbddc->use_vertices = PETSC_TRUE;
1251   }
1252   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1253   /* HACK: provide functions to set change of basis */
1254   if (!ISForVertices && pcbddc->NullSpace) {
1255     pcbddc->use_change_of_basis = PETSC_TRUE;
1256     pcbddc->use_change_on_faces = PETSC_FALSE;
1257   }
1258   /* print some info */
1259   if (pcbddc->dbg_flag) {
1260     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1261     i = 0;
1262     if (ISForVertices) {
1263       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1267     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1268     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1269   }
1270   /* check if near null space is attached to global mat */
1271   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1272   if (nearnullsp) {
1273     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1274   } else { /* if near null space is not provided BDDC uses constants by default */
1275     nnsp_size = 0;
1276     nnsp_has_cnst = PETSC_TRUE;
1277   }
1278   /* get max number of constraints on a single cc */
1279   max_constraints = nnsp_size;
1280   if (nnsp_has_cnst) max_constraints++;
1281 
1282   /*
1283        Evaluate maximum storage size needed by the procedure
1284        - temp_indices will contain start index of each constraint stored as follows
1285        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1286        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1287        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1288                                                                                                                                                          */
1289   total_counts = n_ISForFaces+n_ISForEdges;
1290   total_counts *= max_constraints;
1291   n_vertices = 0;
1292   if (ISForVertices) {
1293     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1294   }
1295   total_counts += n_vertices;
1296   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1297   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1298   total_counts = 0;
1299   max_size_of_constraint = 0;
1300   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1301     if (i<n_ISForEdges) {
1302       used_IS = &ISForEdges[i];
1303     } else {
1304       used_IS = &ISForFaces[i-n_ISForEdges];
1305     }
1306     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1307     total_counts += j;
1308     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1309   }
1310   total_counts *= max_constraints;
1311   total_counts += n_vertices;
1312   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1313   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1314   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1315   /* local to boundary numbering */
1316   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1317   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1318   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1319   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1320   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1321   /* get local part of global near null space vectors */
1322   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1323   for (k=0;k<nnsp_size;k++) {
1324     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1325     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   }
1328 
1329   /* whether or not to skip lapack calls */
1330   skip_lapack = PETSC_TRUE;
1331   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1332 
1333   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1334   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1335     PetscScalar temp_work;
1336 #if defined(PETSC_MISSING_LAPACK_GESVD)
1337     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1338     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1339     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1340     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1341 #if defined(PETSC_USE_COMPLEX)
1342     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1343 #endif
1344     /* now we evaluate the optimal workspace using query with lwork=-1 */
1345     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1346     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1347     lwork = -1;
1348     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1349 #if !defined(PETSC_USE_COMPLEX)
1350     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1351 #else
1352     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1353 #endif
1354     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1355     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1356 #else /* on missing GESVD */
1357     /* SVD */
1358     PetscInt max_n,min_n;
1359     max_n = max_size_of_constraint;
1360     min_n = max_constraints;
1361     if (max_size_of_constraint < max_constraints) {
1362       min_n = max_size_of_constraint;
1363       max_n = max_constraints;
1364     }
1365     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1366 #if defined(PETSC_USE_COMPLEX)
1367     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1368 #endif
1369     /* now we evaluate the optimal workspace using query with lwork=-1 */
1370     lwork = -1;
1371     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1372     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1373     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1374     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1375 #if !defined(PETSC_USE_COMPLEX)
1376     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1377 #else
1378     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1379 #endif
1380     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1381     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1382 #endif /* on missing GESVD */
1383     /* Allocate optimal workspace */
1384     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1385     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1386   }
1387   /* Now we can loop on constraining sets */
1388   total_counts = 0;
1389   temp_indices[0] = 0;
1390   /* vertices */
1391   if (ISForVertices) {
1392     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1393     if (nnsp_has_cnst) { /* consider all vertices */
1394       for (i=0;i<n_vertices;i++) {
1395         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1396         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1397         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1398         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1399         total_counts++;
1400       }
1401     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1402       PetscBool used_vertex;
1403       for (i=0;i<n_vertices;i++) {
1404         used_vertex = PETSC_FALSE;
1405         k = 0;
1406         while (!used_vertex && k<nnsp_size) {
1407           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1408           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1409             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1410             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1411             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1412             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1413             total_counts++;
1414             used_vertex = PETSC_TRUE;
1415           }
1416           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1417           k++;
1418         }
1419       }
1420     }
1421     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1422     n_vertices = total_counts;
1423   }
1424 
1425   /* edges and faces */
1426   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1427     if (i<n_ISForEdges) {
1428       used_IS = &ISForEdges[i];
1429       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1430     } else {
1431       used_IS = &ISForFaces[i-n_ISForEdges];
1432       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1433     }
1434     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1435     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1436     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1437     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1438     /* change of basis should not be performed on local periodic nodes */
1439     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1440     if (nnsp_has_cnst) {
1441       PetscScalar quad_value;
1442       temp_constraints++;
1443       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1444       for (j=0;j<size_of_constraint;j++) {
1445         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1446         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1447         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1448       }
1449       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1450       total_counts++;
1451     }
1452     for (k=0;k<nnsp_size;k++) {
1453       PetscReal real_value;
1454       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1455       for (j=0;j<size_of_constraint;j++) {
1456         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1457         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1458         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1459       }
1460       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1461       /* check if array is null on the connected component */
1462       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1463       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1464       if (real_value > 0.0) { /* keep indices and values */
1465         temp_constraints++;
1466         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1467         total_counts++;
1468       }
1469     }
1470     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1471     valid_constraints = temp_constraints;
1472     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1473     if (!pcbddc->use_nnsp_true && temp_constraints) {
1474       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1475 
1476 #if defined(PETSC_MISSING_LAPACK_GESVD)
1477       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1478          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1479          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1480             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1481             from that computed using LAPACKgesvd
1482          -> This is due to a different computation of eigenvectors in LAPACKheev
1483          -> The quality of the POD-computed basis will be the same */
1484       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1485       /* Store upper triangular part of correlation matrix */
1486       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1487       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1488       for (j=0;j<temp_constraints;j++) {
1489         for (k=0;k<j+1;k++) {
1490           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1491         }
1492       }
1493       /* compute eigenvalues and eigenvectors of correlation matrix */
1494       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1495       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1496 #if !defined(PETSC_USE_COMPLEX)
1497       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1498 #else
1499       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1500 #endif
1501       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1502       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1503       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1504       j=0;
1505       while (j < temp_constraints && singular_vals[j] < tol) j++;
1506       total_counts=total_counts-j;
1507       valid_constraints = temp_constraints-j;
1508       /* scale and copy POD basis into used quadrature memory */
1509       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1514       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1515       if (j<temp_constraints) {
1516         PetscInt ii;
1517         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1518         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1519         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1520         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1521         for (k=0;k<temp_constraints-j;k++) {
1522           for (ii=0;ii<size_of_constraint;ii++) {
1523             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1524           }
1525         }
1526       }
1527 #else  /* on missing GESVD */
1528       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1529       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1530       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1531       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1532 #if !defined(PETSC_USE_COMPLEX)
1533       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1534 #else
1535       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1536 #endif
1537       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1538       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1539       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1540       k = temp_constraints;
1541       if (k > size_of_constraint) k = size_of_constraint;
1542       j = 0;
1543       while (j < k && singular_vals[k-j-1] < tol) j++;
1544       total_counts = total_counts-temp_constraints+k-j;
1545       valid_constraints = k-j;
1546 #endif /* on missing GESVD */
1547     }
1548     /* setting change_of_basis flag is safe now */
1549     if (boolforchange) {
1550       for (j=0;j<valid_constraints;j++) {
1551         PetscBTSet(change_basis,total_counts-j-1);
1552       }
1553     }
1554   }
1555   /* free index sets of faces, edges and vertices */
1556   for (i=0;i<n_ISForFaces;i++) {
1557     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1558   }
1559   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1560   for (i=0;i<n_ISForEdges;i++) {
1561     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1562   }
1563   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1564   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1565 
1566   /* free workspace */
1567   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1568     ierr = PetscFree(work);CHKERRQ(ierr);
1569 #if defined(PETSC_USE_COMPLEX)
1570     ierr = PetscFree(rwork);CHKERRQ(ierr);
1571 #endif
1572     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1573 #if defined(PETSC_MISSING_LAPACK_GESVD)
1574     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1575     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1576 #endif
1577   }
1578   for (k=0;k<nnsp_size;k++) {
1579     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1580   }
1581   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1582 
1583   /* set quantities in pcbddc data structure */
1584   /* n_vertices defines the number of subdomain corners in the primal space */
1585   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1586   pcbddc->local_primal_size = total_counts;
1587   pcbddc->n_vertices = n_vertices;
1588   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1589 
1590   /* Create constraint matrix */
1591   /* The constraint matrix is used to compute the l2g map of primal dofs */
1592   /* so we need to set it up properly either with or without change of basis */
1593   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1594   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1595   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1596   /* array to compute a local numbering of constraints : vertices first then constraints */
1597   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1598   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1599   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1600   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1601   /* auxiliary stuff for basis change */
1602   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1603   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1604 
1605   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1606   total_primal_vertices=0;
1607   for (i=0;i<pcbddc->local_primal_size;i++) {
1608     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1609     if (size_of_constraint == 1) {
1610       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1611       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1612       aux_primal_minloc[total_primal_vertices]=0;
1613       total_primal_vertices++;
1614     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1615       PetscInt min_loc,min_index;
1616       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1617       /* find first untouched local node */
1618       k = 0;
1619       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1620       min_index = global_indices[k];
1621       min_loc = k;
1622       /* search the minimum among global nodes already untouched on the cc */
1623       for (k=1;k<size_of_constraint;k++) {
1624         /* there can be more than one constraint on a single connected component */
1625         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1626           min_index = global_indices[k];
1627           min_loc = k;
1628         }
1629       }
1630       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1631       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1632       aux_primal_minloc[total_primal_vertices]=min_loc;
1633       total_primal_vertices++;
1634     }
1635   }
1636   /* determine if a QR strategy is needed for change of basis */
1637   qr_needed = PETSC_FALSE;
1638   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1639   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1640     if (PetscBTLookup(change_basis,i)) {
1641       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1642       j = 0;
1643       for (k=0;k<size_of_constraint;k++) {
1644         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1645           j++;
1646         }
1647       }
1648       /* found more than one primal dof on the cc */
1649       if (j > 1) {
1650         PetscBTSet(qr_needed_idx,i);
1651         qr_needed = PETSC_TRUE;
1652       }
1653     }
1654   }
1655   /* free workspace */
1656   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1657 
1658   /* permute indices in order to have a sorted set of vertices */
1659   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1660 
1661   /* nonzero structure of constraint matrix */
1662   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1663   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1664   j=total_primal_vertices;
1665   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1666     if (!PetscBTLookup(change_basis,i)) {
1667       nnz[j]=temp_indices[i+1]-temp_indices[i];
1668       j++;
1669     }
1670   }
1671   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1672   ierr = PetscFree(nnz);CHKERRQ(ierr);
1673   /* set values in constraint matrix */
1674   for (i=0;i<total_primal_vertices;i++) {
1675     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1676   }
1677   total_counts = total_primal_vertices;
1678   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1679     if (!PetscBTLookup(change_basis,i)) {
1680       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1681       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1682       total_counts++;
1683     }
1684   }
1685   /* assembling */
1686   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   /*
1689   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1690   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1691   */
1692   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1693   if (pcbddc->use_change_of_basis) {
1694     /* dual and primal dofs on a single cc */
1695     PetscInt     dual_dofs,primal_dofs;
1696     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1697     PetscInt     primal_counter;
1698     /* working stuff for GEQRF */
1699     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1700     PetscBLASInt lqr_work;
1701     /* working stuff for UNGQR */
1702     PetscScalar  *gqr_work,lgqr_work_t;
1703     PetscBLASInt lgqr_work;
1704     /* working stuff for TRTRS */
1705     PetscScalar  *trs_rhs;
1706     PetscBLASInt Blas_NRHS;
1707     /* pointers for values insertion into change of basis matrix */
1708     PetscInt     *start_rows,*start_cols;
1709     PetscScalar  *start_vals;
1710     /* working stuff for values insertion */
1711     PetscBT      is_primal;
1712 
1713     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1714     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1715     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1716     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1717     /* work arrays */
1718     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1719     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1720     /* nonzeros per row */
1721     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1722       if (PetscBTLookup(change_basis,i)) {
1723         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1724         if (PetscBTLookup(qr_needed_idx,i)) {
1725           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1726         } else {
1727           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1728           /* get local primal index on the cc */
1729           j = 0;
1730           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1731           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1732         }
1733       }
1734     }
1735     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1736     ierr = PetscFree(nnz);CHKERRQ(ierr);
1737     /* Set initial identity in the matrix */
1738     for (i=0;i<pcis->n_B;i++) {
1739       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1740     }
1741 
1742     if (pcbddc->dbg_flag) {
1743       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1744       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1745     }
1746 
1747 
1748     /* Now we loop on the constraints which need a change of basis */
1749     /*
1750        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1751        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1752 
1753        Basic blocks of change of basis matrix T computed
1754 
1755           - Using the following block transformation if there is only a primal dof on the cc
1756             (in the example, primal dof is the last one of the edge in LOCAL ordering
1757              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1758             | 1        0   ...        0     1 |
1759             | 0        1   ...        0     1 |
1760             |              ...                |
1761             | 0        ...            1     1 |
1762             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1763 
1764           - via QR decomposition of constraints otherwise
1765     */
1766     if (qr_needed) {
1767       /* space to store Q */
1768       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1769       /* first we issue queries for optimal work */
1770       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1771       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1772       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1773       lqr_work = -1;
1774       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1775       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1776       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1777       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1778       lgqr_work = -1;
1779       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1780       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1781       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1782       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1783       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1784       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1785       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1786       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1787       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1788       /* array to store scaling factors for reflectors */
1789       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1790       /* array to store rhs and solution of triangular solver */
1791       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1792       /* allocating workspace for check */
1793       if (pcbddc->dbg_flag) {
1794         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1795       }
1796     }
1797     /* array to store whether a node is primal or not */
1798     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1799     for (i=0;i<total_primal_vertices;i++) {
1800       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1801     }
1802 
1803     /* loop on constraints and see whether or not they need a change of basis and compute it */
1804     /* -> using implicit ordering contained in temp_indices data */
1805     total_counts = pcbddc->n_vertices;
1806     primal_counter = total_counts;
1807     while (total_counts<pcbddc->local_primal_size) {
1808       primal_dofs = 1;
1809       if (PetscBTLookup(change_basis,total_counts)) {
1810         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1811         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1812           primal_dofs++;
1813         }
1814         /* get constraint info */
1815         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1816         dual_dofs = size_of_constraint-primal_dofs;
1817 
1818         if (pcbddc->dbg_flag) {
1819           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1820         }
1821 
1822         if (primal_dofs > 1) { /* QR */
1823 
1824           /* copy quadrature constraints for change of basis check */
1825           if (pcbddc->dbg_flag) {
1826             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1827           }
1828           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1829           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1830 
1831           /* compute QR decomposition of constraints */
1832           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1833           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1834           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1835           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1836           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1837           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1838           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1839 
1840           /* explictly compute R^-T */
1841           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1842           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1843           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1844           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1845           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1846           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1847           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1848           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1849           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1850           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1851 
1852           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1853           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1854           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1855           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1856           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1857           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1858           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1859           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1860           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1861 
1862           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1863              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1864              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1865           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1869           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1870           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1871           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1872           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1873           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1874           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1875 
1876           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1877           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1878           /* insert cols for primal dofs */
1879           for (j=0;j<primal_dofs;j++) {
1880             start_vals = &qr_basis[j*size_of_constraint];
1881             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1882             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1883           }
1884           /* insert cols for dual dofs */
1885           for (j=0,k=0;j<dual_dofs;k++) {
1886             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1887               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1888               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1889               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1890               j++;
1891             }
1892           }
1893 
1894           /* check change of basis */
1895           if (pcbddc->dbg_flag) {
1896             PetscInt   ii,jj;
1897             PetscBool valid_qr=PETSC_TRUE;
1898             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1899             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1901             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1902             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1903             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1904             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1905             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1906             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1907             for (jj=0;jj<size_of_constraint;jj++) {
1908               for (ii=0;ii<primal_dofs;ii++) {
1909                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1910                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1911               }
1912             }
1913             if (!valid_qr) {
1914               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1915               for (jj=0;jj<size_of_constraint;jj++) {
1916                 for (ii=0;ii<primal_dofs;ii++) {
1917                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1918                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1919                   }
1920                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1921                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1922                   }
1923                 }
1924               }
1925             } else {
1926               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1927             }
1928           }
1929         } else { /* simple transformation block */
1930           PetscInt row,col;
1931           PetscScalar val;
1932           for (j=0;j<size_of_constraint;j++) {
1933             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1934             if (!PetscBTLookup(is_primal,row)) {
1935               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1936               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1937               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1938             } else {
1939               for (k=0;k<size_of_constraint;k++) {
1940                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1941                 if (row != col) {
1942                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1943                 } else {
1944                   val = 1.0;
1945                 }
1946                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1947               }
1948             }
1949           }
1950         }
1951         /* increment primal counter */
1952         primal_counter += primal_dofs;
1953       } else {
1954         if (pcbddc->dbg_flag) {
1955           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1956         }
1957       }
1958       /* increment constraint counter total_counts */
1959       total_counts += primal_dofs;
1960     }
1961 
1962     /* free workspace */
1963     if (qr_needed) {
1964       if (pcbddc->dbg_flag) {
1965         ierr = PetscFree(work);CHKERRQ(ierr);
1966       }
1967       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1968       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1969       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1970       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1971       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1972     }
1973     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1974     /* assembling */
1975     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1977     /*
1978     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1979     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1980     */
1981   }
1982 
1983   /* flush dbg viewer */
1984   if (pcbddc->dbg_flag) {
1985     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1986   }
1987 
1988   /* free workspace */
1989   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1990   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1991   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1992   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1993   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1994   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1995   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1996   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1997   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1998   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2004 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2005 {
2006   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2007   PC_IS       *pcis = (PC_IS*)pc->data;
2008   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2009   PetscInt    bs,ierr,i,vertex_size;
2010   PetscViewer viewer=pcbddc->dbg_viewer;
2011 
2012   PetscFunctionBegin;
2013   /* Reset previously computed graph */
2014   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2015   /* Init local Graph struct */
2016   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2017 
2018   /* Check validity of the csr graph passed in by the user */
2019   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2020     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2021   }
2022 
2023   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2024   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2025     Mat mat_adj;
2026     const PetscInt *xadj,*adjncy;
2027     PetscBool flg_row=PETSC_TRUE;
2028 
2029     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2030     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2031     if (!flg_row) {
2032       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2033     }
2034     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2035     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2036     if (!flg_row) {
2037       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2038     }
2039     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2040   }
2041 
2042   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2043   vertex_size = 1;
2044   if (!pcbddc->user_provided_isfordofs) {
2045     if (!pcbddc->n_ISForDofs) {
2046       IS *custom_ISForDofs;
2047 
2048       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2049       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2050       for (i=0;i<bs;i++) {
2051         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2052       }
2053       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2054       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2055       /* remove my references to IS objects */
2056       for (i=0;i<bs;i++) {
2057         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2058       }
2059       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2060     }
2061   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2062     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2063   }
2064 
2065   /* Setup of Graph */
2066   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2067 
2068   /* Graph's connected components analysis */
2069   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2070 
2071   /* print some info to stdout */
2072   if (pcbddc->dbg_flag) {
2073     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2080 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2081 {
2082   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2083   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   n = 0;
2088   vertices = 0;
2089   if (pcbddc->ConstraintMatrix) {
2090     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2091     for (i=0;i<local_primal_size;i++) {
2092       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2093       if (size_of_constraint == 1) n++;
2094       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2095     }
2096     if (vertices_idx) {
2097       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2098       n = 0;
2099       for (i=0;i<local_primal_size;i++) {
2100         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2101         if (size_of_constraint == 1) {
2102           vertices[n++]=row_cmat_indices[0];
2103         }
2104         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2105       }
2106     }
2107   }
2108   *n_vertices = n;
2109   if (vertices_idx) *vertices_idx = vertices;
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2115 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2116 {
2117   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2118   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2119   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2120   PetscBT        touched;
2121   PetscErrorCode ierr;
2122 
2123     /* This function assumes that the number of local constraints per connected component
2124        is not greater than the number of nodes defined for the connected component
2125        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2126   PetscFunctionBegin;
2127   n = 0;
2128   constraints_index = 0;
2129   if (pcbddc->ConstraintMatrix) {
2130     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2131     max_size_of_constraint = 0;
2132     for (i=0;i<local_primal_size;i++) {
2133       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2134       if (size_of_constraint > 1) {
2135         n++;
2136       }
2137       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2138       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2139     }
2140     if (constraints_idx) {
2141       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2142       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2143       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2144       n = 0;
2145       for (i=0;i<local_primal_size;i++) {
2146         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2147         if (size_of_constraint > 1) {
2148           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2149           /* find first untouched local node */
2150           j = 0;
2151           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2152           min_index = row_cmat_global_indices[j];
2153           min_loc = j;
2154           /* search the minimum among nodes not yet touched on the connected component
2155              since there can be more than one constraint on a single cc */
2156           for (j=1;j<size_of_constraint;j++) {
2157             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2158               min_index = row_cmat_global_indices[j];
2159               min_loc = j;
2160             }
2161           }
2162           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2163           constraints_index[n++] = row_cmat_indices[min_loc];
2164         }
2165         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2166       }
2167       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2168       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2169     }
2170   }
2171   *n_constraints = n;
2172   if (constraints_idx) *constraints_idx = constraints_index;
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /* the next two functions has been adapted from pcis.c */
2177 #undef __FUNCT__
2178 #define __FUNCT__ "PCBDDCApplySchur"
2179 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2180 {
2181   PetscErrorCode ierr;
2182   PC_IS          *pcis = (PC_IS*)(pc->data);
2183 
2184   PetscFunctionBegin;
2185   if (!vec2_B) { vec2_B = v; }
2186   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2187   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2188   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2189   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2190   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2196 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2197 {
2198   PetscErrorCode ierr;
2199   PC_IS          *pcis = (PC_IS*)(pc->data);
2200 
2201   PetscFunctionBegin;
2202   if (!vec2_B) { vec2_B = v; }
2203   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2204   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2205   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2206   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2207   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "PCBDDCSubsetNumbering"
2213 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2214 {
2215   Vec            local_vec,global_vec;
2216   IS             seqis,paris;
2217   VecScatter     scatter_ctx;
2218   PetscScalar    *array;
2219   PetscInt       *temp_global_dofs;
2220   PetscScalar    globalsum;
2221   PetscInt       i,j,s;
2222   PetscInt       nlocals,first_index,old_index,max_local;
2223   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2224   PetscMPIInt    *dof_sizes,*dof_displs;
2225   PetscBool      first_found;
2226   PetscErrorCode ierr;
2227 
2228   PetscFunctionBegin;
2229   /* mpi buffers */
2230   MPI_Comm_size(comm,&size_prec_comm);
2231   MPI_Comm_rank(comm,&rank_prec_comm);
2232   j = ( !rank_prec_comm ? size_prec_comm : 0);
2233   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2234   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2235   /* get maximum size of subset */
2236   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2237   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2238   max_local = 0;
2239   if (n_local_dofs) {
2240     max_local = temp_global_dofs[0];
2241     for (i=1;i<n_local_dofs;i++) {
2242       if (max_local < temp_global_dofs[i] ) {
2243         max_local = temp_global_dofs[i];
2244       }
2245     }
2246   }
2247   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2248   max_global++;
2249   max_local = 0;
2250   if (n_local_dofs) {
2251     max_local = local_dofs[0];
2252     for (i=1;i<n_local_dofs;i++) {
2253       if (max_local < local_dofs[i] ) {
2254         max_local = local_dofs[i];
2255       }
2256     }
2257   }
2258   max_local++;
2259   /* allocate workspace */
2260   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2261   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2262   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2263   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2264   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2265   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2266   /* create scatter */
2267   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2268   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2269   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2270   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2271   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2272   /* init array */
2273   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2274   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2275   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2276   if (local_dofs_mult) {
2277     for (i=0;i<n_local_dofs;i++) {
2278       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2279     }
2280   } else {
2281     for (i=0;i<n_local_dofs;i++) {
2282       array[local_dofs[i]]=1.0;
2283     }
2284   }
2285   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2286   /* scatter into global vec and get total number of global dofs */
2287   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2289   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2290   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2291   /* Fill global_vec with cumulative function for global numbering */
2292   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2293   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2294   nlocals = 0;
2295   first_index = -1;
2296   first_found = PETSC_FALSE;
2297   for (i=0;i<s;i++) {
2298     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2299       first_found = PETSC_TRUE;
2300       first_index = i;
2301     }
2302     nlocals += (PetscInt)PetscRealPart(array[i]);
2303   }
2304   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2305   if (!rank_prec_comm) {
2306     dof_displs[0]=0;
2307     for (i=1;i<size_prec_comm;i++) {
2308       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2309     }
2310   }
2311   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2312   if (first_found) {
2313     array[first_index] += (PetscScalar)nlocals;
2314     old_index = first_index;
2315     for (i=first_index+1;i<s;i++) {
2316       if (PetscRealPart(array[i]) > 0.0) {
2317         array[i] += array[old_index];
2318         old_index = i;
2319       }
2320     }
2321   }
2322   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2323   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2324   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2325   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2326   /* get global ordering of local dofs */
2327   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2328   if (local_dofs_mult) {
2329     for (i=0;i<n_local_dofs;i++) {
2330       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2331     }
2332   } else {
2333     for (i=0;i<n_local_dofs;i++) {
2334       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2335     }
2336   }
2337   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2338   /* free workspace */
2339   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2340   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2341   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2342   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2343   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2344   /* return pointer to global ordering of local dofs */
2345   *global_numbering_subset = temp_global_dofs;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2351 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2352 {
2353   PetscInt       i,j;
2354   PetscScalar    *alphas;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   /* this implements stabilized Gram-Schmidt */
2359   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2360   for (i=0;i<n;i++) {
2361     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2362     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2363     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2364   }
2365   ierr = PetscFree(alphas);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatConvert_IS_AIJ"
2372 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2373 {
2374   Mat new_mat;
2375   Mat_IS *matis = (Mat_IS*)(mat->data);
2376   /* info on mat */
2377   PetscInt bs,rows,cols;
2378   PetscInt lrows,lcols;
2379   PetscInt local_rows,local_cols;
2380   PetscMPIInt nsubdomains;
2381   /* preallocation */
2382   Vec vec_dnz,vec_onz;
2383   PetscScalar *my_dnz,*my_onz,*array;
2384   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2385   PetscInt  index_row,index_col,owner;
2386   PetscInt  *local_indices,*global_indices;
2387   /* work */
2388   PetscInt i,j;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   /* CHECKS */
2393   /* get info from mat */
2394   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2395   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2396   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2397   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2398 
2399   /* MAT_INITIAL_MATRIX */
2400   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2401   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2402   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2403   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2404   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2405   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2406 
2407   /*
2408     preallocation
2409   */
2410 
2411   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2412   /*
2413      Some vectors are needed to sum up properly on shared interface dofs.
2414      Note that preallocation is not exact, since it overestimates nonzeros
2415   */
2416 /*
2417   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2418   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2419   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2420   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2421 */
2422   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2423   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2424   /* All processes need to compute entire row ownership */
2425   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2426   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2427   for (i=0;i<nsubdomains;i++) {
2428     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2429       row_ownership[j]=i;
2430     }
2431   }
2432   /* map indices of local mat to global values */
2433   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2434   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2435   for (i=0;i<local_rows;i++) local_indices[i]=i;
2436   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2437   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2438 
2439   /*
2440      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2441      then, they will be summed up properly. This way, preallocation is always sufficient
2442   */
2443   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2444   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2445   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2446   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2447   for (i=0;i<local_rows;i++) {
2448     index_row = global_indices[i];
2449     for (j=i;j<local_rows;j++) {
2450       owner = row_ownership[index_row];
2451       index_col = global_indices[j];
2452       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2453         my_dnz[i] += 1.0;
2454       } else { /* offdiag block */
2455         my_onz[i] += 1.0;
2456       }
2457       /* same as before, interchanging rows and cols */
2458       if (i != j) {
2459         owner = row_ownership[index_col];
2460         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2461           my_dnz[j] += 1.0;
2462         } else {
2463           my_onz[j] += 1.0;
2464         }
2465       }
2466     }
2467   }
2468   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2469   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2470   if (local_rows) { /* multilevel guard */
2471     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2472     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2473   }
2474   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2475   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2476   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2477   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2478   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2479   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2480   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2481 
2482   /* set computed preallocation in dnz and onz */
2483   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2484   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2485   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2486   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2487   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2488   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2489   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2490   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2491 
2492   /* Resize preallocation if overestimated */
2493   for (i=0;i<lrows;i++) {
2494     dnz[i] = PetscMin(dnz[i],lcols);
2495     onz[i] = PetscMin(onz[i],cols-lcols);
2496   }
2497   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2498   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2499 
2500   /*
2501     Set values. Very Basic.
2502   */
2503   for (i=0;i<local_rows;i++) {
2504     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2505     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2506     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2507     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2508     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2509   }
2510   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2511   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2512   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2513 
2514   /* get back mat */
2515   *M = new_mat;
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatISSubassemble_Private"
2521 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2522 {
2523   Mat             subdomain_adj;
2524   IS              new_ranks,ranks_send_to;
2525   MatPartitioning partitioner;
2526   Mat_IS          *matis;
2527   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2528   PetscInt        prank;
2529   PetscMPIInt     size,rank,color;
2530   PetscInt        *xadj,*adjncy,*oldranks;
2531   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2532   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2533   PetscErrorCode  ierr;
2534   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2535   PetscSubcomm    subcomm;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2539   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2540   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2541 
2542   /* Get info on mapping */
2543   matis = (Mat_IS*)(mat->data);
2544   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2545   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2546 
2547   /* build local CSR graph of subdomains' connectivity */
2548   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2549   xadj[0] = 0;
2550   xadj[1] = PetscMax(n_neighs-1,0);
2551   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2552   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2553 
2554   if (threshold) {
2555     PetscInt* count,min_threshold;
2556     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2557     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2558     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2559       for (j=0;j<n_shared[i];j++) {
2560         count[shared[i][j]] += 1;
2561       }
2562     }
2563     /* adapt threshold since we dont want to lose significant connections */
2564     min_threshold = n_neighs;
2565     for (i=1;i<n_neighs;i++) {
2566       for (j=0;j<n_shared[i];j++) {
2567         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2568       }
2569     }
2570     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2571     threshold = PetscMax(min_threshold+1,threshold);
2572     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2573     xadj[1] = 0;
2574     for (i=1;i<n_neighs;i++) {
2575       for (j=0;j<n_shared[i];j++) {
2576         if (count[shared[i][j]] < threshold) {
2577           adjncy[xadj[1]] = neighs[i];
2578           adjncy_wgt[xadj[1]] = n_shared[i];
2579           xadj[1]++;
2580           break;
2581         }
2582       }
2583     }
2584     ierr = PetscFree(count);CHKERRQ(ierr);
2585   } else {
2586     if (xadj[1]) {
2587       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2588       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2589     }
2590   }
2591   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2592   if (use_square) {
2593     for (i=0;i<xadj[1];i++) {
2594       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2595     }
2596   }
2597   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2598 
2599   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2600 
2601   /*
2602     Restrict work on active processes only.
2603   */
2604   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2605   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2606   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2607   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2608   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2609   if (color) {
2610     ierr = PetscFree(xadj);CHKERRQ(ierr);
2611     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2612     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2613   } else {
2614     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2615     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2616     prank = rank;
2617     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2618     for (i=0;i<size;i++) {
2619       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2620     }
2621     for (i=0;i<xadj[1];i++) {
2622       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2623     }
2624     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2625     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2626     n_subdomains = (PetscInt)size;
2627     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2628 
2629     /* Partition */
2630     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2631     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2632     if (use_vwgt) {
2633       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2634       v_wgt[0] = local_size;
2635       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2636     }
2637     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2638     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2639     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2640     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2641     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2642 
2643     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2644     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2645     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2646     /* clean up */
2647     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2648     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2649     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2650     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2651   }
2652   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2653 
2654   /* assemble parallel IS for sends */
2655   i = 1;
2656   if (color) i=0;
2657   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2658   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2659 
2660   /* get back IS */
2661   *is_sends = ranks_send_to;
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatISSubassemble"
2669 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2670 {
2671   Mat                    local_mat,new_mat;
2672   Mat_IS                 *matis;
2673   IS                     is_sends_internal;
2674   PetscInt               rows,cols;
2675   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2676   PetscBool              ismatis,isdense;
2677   ISLocalToGlobalMapping l2gmap;
2678   PetscInt*              l2gmap_indices;
2679   const PetscInt*        is_indices;
2680   MatType                new_local_type;
2681   MatTypePrivate         new_local_type_private;
2682   /* buffers */
2683   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2684   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2685   /* MPI */
2686   MPI_Comm               comm;
2687   PetscMPIInt            n_sends,n_recvs,commsize;
2688   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2689   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2690   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2691   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2692   PetscErrorCode         ierr;
2693 
2694   PetscFunctionBegin;
2695   /* checks */
2696   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2697   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2698   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2699   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2700   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2701   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2702   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2703   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2704   PetscValidLogicalCollectiveInt(mat,bs,0);
2705   /* prepare IS for sending if not provided */
2706   if (!is_sends) {
2707     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2708     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2709   } else {
2710     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2711     is_sends_internal = is_sends;
2712   }
2713 
2714   /* get pointer of MATIS data */
2715   matis = (Mat_IS*)mat->data;
2716 
2717   /* get comm */
2718   comm = PetscObjectComm((PetscObject)mat);
2719 
2720   /* compute number of sends */
2721   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2722   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2723 
2724   /* compute number of receives */
2725   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2726   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2727   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2728   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2729   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2730   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2731   ierr = PetscFree(iflags);CHKERRQ(ierr);
2732 
2733   /* prepare send/receive buffers */
2734   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2735   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2736   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2737   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2738 
2739   /* Get data from local mat */
2740   if (!isdense) {
2741     /* TODO: See below some guidelines on how to prepare the local buffers */
2742     /*
2743        send_buffer_vals should contain the raw values of the local matrix
2744        send_buffer_idxs should contain:
2745        - MatType_PRIVATE type
2746        - PetscInt        size_of_l2gmap
2747        - PetscInt        global_row_indices[size_of_l2gmap]
2748        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2749     */
2750   } else {
2751     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2752     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2753     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2754     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2755     send_buffer_idxs[1] = i;
2756     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2757     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2758     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2759     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2760     for (i=0;i<n_sends;i++) {
2761       ilengths_vals[is_indices[i]] = len*len;
2762       ilengths_idxs[is_indices[i]] = len+2;
2763     }
2764   }
2765   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2766   buf_size_idxs = 0;
2767   buf_size_vals = 0;
2768   for (i=0;i<n_recvs;i++) {
2769     buf_size_idxs += (PetscInt)olengths_idxs[i];
2770     buf_size_vals += (PetscInt)olengths_vals[i];
2771   }
2772   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2773   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2774 
2775   /* get new tags for clean communications */
2776   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2777   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2778 
2779   /* allocate for requests */
2780   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2781   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2782   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2783   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2784 
2785   /* communications */
2786   ptr_idxs = recv_buffer_idxs;
2787   ptr_vals = recv_buffer_vals;
2788   for (i=0;i<n_recvs;i++) {
2789     source_dest = onodes[i];
2790     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2791     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2792     ptr_idxs += olengths_idxs[i];
2793     ptr_vals += olengths_vals[i];
2794   }
2795   for (i=0;i<n_sends;i++) {
2796     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2797     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2798     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2799   }
2800   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2801   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2802 
2803   /* assemble new l2g map */
2804   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2805   ptr_idxs = recv_buffer_idxs;
2806   buf_size_idxs = 0;
2807   for (i=0;i<n_recvs;i++) {
2808     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2809     ptr_idxs += olengths_idxs[i];
2810   }
2811   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2812   ptr_idxs = recv_buffer_idxs;
2813   buf_size_idxs = 0;
2814   for (i=0;i<n_recvs;i++) {
2815     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2816     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2817     ptr_idxs += olengths_idxs[i];
2818   }
2819   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2820   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2821   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2822 
2823   /* infer new local matrix type from received local matrices type */
2824   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2825   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2826   new_local_type_private = MATAIJ_PRIVATE;
2827   new_local_type = MATSEQAIJ;
2828   if (n_recvs) {
2829     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2830     ptr_idxs = recv_buffer_idxs;
2831     for (i=0;i<n_recvs;i++) {
2832       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2833         new_local_type_private = MATAIJ_PRIVATE;
2834         break;
2835       }
2836       ptr_idxs += olengths_idxs[i];
2837     }
2838     switch (new_local_type_private) {
2839       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2840         new_local_type = MATSEQAIJ;
2841         bs = 1;
2842         break;
2843       case MATAIJ_PRIVATE:
2844         new_local_type = MATSEQAIJ;
2845         bs = 1;
2846         break;
2847       case MATBAIJ_PRIVATE:
2848         new_local_type = MATSEQBAIJ;
2849         break;
2850       case MATSBAIJ_PRIVATE:
2851         new_local_type = MATSEQSBAIJ;
2852         break;
2853       default:
2854         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2855         break;
2856     }
2857   }
2858 
2859   /* create MATIS object */
2860   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2861   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2862   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2863   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2864   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2865   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2866 
2867   /* set values */
2868   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2869   ptr_vals = recv_buffer_vals;
2870   ptr_idxs = recv_buffer_idxs;
2871   for (i=0;i<n_recvs;i++) {
2872     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2873       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2874       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2875       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2876       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2877       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2878     }
2879     ptr_idxs += olengths_idxs[i];
2880     ptr_vals += olengths_vals[i];
2881   }
2882   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886 
2887   { /* check */
2888     Vec       lvec,rvec;
2889     PetscReal infty_error;
2890 
2891     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2892     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2893     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2894     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2895     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2896     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2897     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2898     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2899     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2900   }
2901 
2902   /* free workspace */
2903   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2904   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2905   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2906   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2907   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2908   if (isdense) {
2909     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2910     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2911   } else {
2912     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2913   }
2914   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2915   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2916   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2917   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2918   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2919   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2920   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2921   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2922   ierr = PetscFree(onodes);CHKERRQ(ierr);
2923   /* get back new mat */
2924   *mat_n = new_mat;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #undef __FUNCT__
2929 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2930 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2931 {
2932   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2933   PC_IS                  *pcis = (PC_IS*)pc->data;
2934   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2935   MatNullSpace           CoarseNullSpace=NULL;
2936   ISLocalToGlobalMapping coarse_islg;
2937   IS                     coarse_is;
2938   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2939   PetscInt               im_active=-1,active_procs=-1;
2940   PC                     pc_temp;
2941   PCType                 coarse_pc_type;
2942   KSPType                coarse_ksp_type;
2943   PetscBool              multilevel_requested,multilevel_allowed;
2944   PetscBool              setsym,issym,isbddc,isnn;
2945   MatStructure           matstruct;
2946   PetscErrorCode         ierr;
2947 
2948   PetscFunctionBegin;
2949   /* Assign global numbering to coarse dofs */
2950   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2951 
2952   /* infer some info from user */
2953   issym = PETSC_FALSE;
2954   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2955   multilevel_allowed = PETSC_FALSE;
2956   multilevel_requested = PETSC_FALSE;
2957   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2958   if (multilevel_requested) {
2959     /* count "active processes" */
2960     im_active = !!(pcis->n);
2961     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2962     if (active_procs/pcbddc->coarsening_ratio < 2) {
2963       multilevel_allowed = PETSC_FALSE;
2964     } else {
2965       multilevel_allowed = PETSC_TRUE;
2966     }
2967   }
2968 
2969   /* set defaults for coarse KSP and PC */
2970   if (multilevel_allowed) {
2971     if (issym) {
2972       coarse_ksp_type = KSPRICHARDSON;
2973     } else {
2974       coarse_ksp_type = KSPCHEBYSHEV;
2975     }
2976     coarse_pc_type = PCBDDC;
2977   } else {
2978     coarse_ksp_type = KSPPREONLY;
2979     coarse_pc_type = PCREDUNDANT;
2980   }
2981 
2982   /* create the coarse KSP object only once with defaults */
2983   if (!pcbddc->coarse_ksp) {
2984     char prefix[256],str_level[3];
2985     size_t len;
2986     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2987     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2988     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2989     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2990     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2991     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2992     /* prefix */
2993     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2994     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2995     if (!pcbddc->current_level) {
2996       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2997       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2998     } else {
2999       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3000       if (pcbddc->current_level>1) len -= 2;
3001       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3002       *(prefix+len)='\0';
3003       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3004       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3005     }
3006     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3007   }
3008   /* allow user customization */
3009   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3010   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3011   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3012 
3013   /* get some info after set from options */
3014   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3015   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3016   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3017   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3018     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3019     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3020     isbddc = PETSC_FALSE;
3021   }
3022 
3023   /* propagate BDDC info to the next level */
3024   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3025   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3026   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3027 
3028   /* creates temporary MATIS object for coarse matrix */
3029   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3030   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3031   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3032   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3033   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3034   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3035   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3036   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3037   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3038   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3039 
3040   /* assemble coarse matrix */
3041   if (isbddc || isnn) {
3042     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3043   } else {
3044     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3045   }
3046   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3047 
3048   /* create local to global scatters for coarse problem */
3049   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3050   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3051   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3052 
3053   /* propagate symmetry info to coarse matrix */
3054   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3055 
3056   /* Compute coarse null space (special handling by BDDC only) */
3057   if (pcbddc->NullSpace) {
3058     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3059     if (isbddc) {
3060       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3061     } else {
3062       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3063     }
3064   }
3065 
3066   /* set operators */
3067   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3068   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3069 
3070   /* additional KSP customization */
3071   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3072   if (max_it < 5) {
3073     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3074   }
3075   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3076 
3077 
3078   /* print some info if requested */
3079   if (pcbddc->dbg_flag) {
3080     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3081     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3082     if (!multilevel_allowed) {
3083       if (multilevel_requested) {
3084         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3085       } else if (pcbddc->max_levels) {
3086         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3087       }
3088     }
3089     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3090     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3091   }
3092 
3093   /* setup coarse ksp */
3094   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3095   if (pcbddc->dbg_flag) {
3096     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3098     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3099     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3100   }
3101 
3102   /* Check coarse problem if requested */
3103   if (pcbddc->dbg_flag) {
3104     KSP       check_ksp;
3105     KSPType   check_ksp_type;
3106     PC        check_pc;
3107     Vec       check_vec;
3108     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3109     PetscInt  its;
3110     PetscBool ispreonly,compute;
3111 
3112     /* Create ksp object suitable for estimation of extreme eigenvalues */
3113     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3114     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3115     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3116     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3117     if (ispreonly) {
3118       check_ksp_type = KSPPREONLY;
3119       compute = PETSC_FALSE;
3120     } else {
3121       if (issym) check_ksp_type = KSPCG;
3122       else check_ksp_type = KSPGMRES;
3123       compute = PETSC_TRUE;
3124     }
3125     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3126     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3127     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3128     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3129     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3130     /* create random vec */
3131     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3132     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3133     if (CoarseNullSpace) {
3134       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3135     }
3136     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3137     /* solve coarse problem */
3138     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3139     if (CoarseNullSpace) {
3140       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3141     }
3142     /* check coarse problem residual error */
3143     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3144     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3145     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3146     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3147     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3148     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3149     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3150     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3151     /* get eigenvalue estimation if preonly has not been requested */
3152     if (!ispreonly) {
3153       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3154       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3155       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3156     }
3157     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3158     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3159   }
3160   /* free memory */
3161   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3162   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3168 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3169 {
3170   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3171   PC_IS*         pcis = (PC_IS*)pc->data;
3172   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3173   PetscInt       i,j,coarse_size;
3174   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3175   PetscErrorCode ierr;
3176 
3177   PetscFunctionBegin;
3178   /* get indices in local ordering for vertices and constraints */
3179   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3180   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3181   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3182   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3183   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3184   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3185   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3186 
3187   /* Compute global number of coarse dofs */
3188   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3189 
3190   /* check numbering */
3191   if (pcbddc->dbg_flag) {
3192     PetscScalar coarsesum,*array;
3193 
3194     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3195     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3196     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3197     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3198     for (i=0;i<pcbddc->local_primal_size;i++) {
3199       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3200     }
3201     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3202     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3203     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3204     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3205     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3206     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3207     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3208     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3209     for (i=0;i<pcis->n;i++) {
3210       if (array[i] == 1.0) {
3211         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3212       }
3213     }
3214     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3215     for (i=0;i<pcis->n;i++) {
3216       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3217     }
3218     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3219     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3220     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3221     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3222     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3224     if (pcbddc->dbg_flag > 1) {
3225       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3226       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3227       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3228       for (i=0;i<pcbddc->local_primal_size;i++) {
3229         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3230       }
3231       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3232     }
3233     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3234   }
3235   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3236   /* get back data */
3237   *coarse_size_n = coarse_size;
3238   *local_primal_indices_n = local_primal_indices;
3239   PetscFunctionReturn(0);
3240 }
3241 
3242