xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision e456f2a8cd77b259faa7f5cd97a941702f1a7f35)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   for (i=0;i<pcbddc->n_ISForDofs;i++) {
62     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
63   }
64   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
65   pcbddc->n_ISForDofs = 0;
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
79   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "PCBDDCResetSolvers"
85 PetscErrorCode PCBDDCResetSolvers(PC pc)
86 {
87   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
92   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "PCBDDCCreateWorkVectors"
113 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
114 {
115   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
116   PC_IS          *pcis = (PC_IS*)pc->data;
117   VecType        impVecType;
118   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
123   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
124   local_primal_size = n_constraints+n_vertices;
125   n_R = pcis->n-n_vertices;
126   /* local work vectors */
127   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
128   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
129   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
130   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
131   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
132   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
133   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
134   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
135   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
136   if (n_constraints) {
137     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
138     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
139     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "PCBDDCSetUpCorrection"
146 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
147 {
148   PetscErrorCode         ierr;
149   /* pointers to pcis and pcbddc */
150   PC_IS*                 pcis = (PC_IS*)pc->data;
151   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
152   /* submatrices of local problem */
153   Mat                    A_RV,A_VR,A_VV;
154   /* working matrices */
155   Mat                    M1,M2,M3,C_CR;
156   /* working vectors */
157   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
158   /* additional working stuff */
159   IS                     is_aux;
160   ISLocalToGlobalMapping BtoNmap;
161   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
162   const PetscScalar      *array,*row_cmat_values;
163   const PetscInt         *row_cmat_indices,*idx_R_local;
164   PetscInt               *vertices,*idx_V_B,*auxindices;
165   PetscInt               n_vertices,n_constraints,size_of_constraint;
166   PetscInt               i,j,n_R,n_D,n_B;
167   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
168   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
169   MatType                impMatType;
170   /* some shortcuts to scalars */
171   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
172   /* for debugging purposes */
173   PetscReal              *coarsefunctions_errors,*constraints_errors;
174 
175   PetscFunctionBegin;
176   /* get number of vertices and their local indices */
177   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
178   n_constraints = pcbddc->local_primal_size-n_vertices;
179   /* Set Non-overlapping dimensions */
180   n_B = pcis->n_B; n_D = pcis->n - n_B;
181   n_R = pcis->n-n_vertices;
182 
183   /* Set types for local objects needed by BDDC precondtioner */
184   impMatType = MATSEQDENSE;
185 
186   /* Allocating some extra storage just to be safe */
187   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
188   for (i=0;i<pcis->n;i++) auxindices[i]=i;
189 
190   /* vertices in boundary numbering */
191   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
193   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
194   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
195   if (i != n_vertices) {
196     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
197   }
198 
199   /* Precompute stuffs needed for preprocessing and application of BDDC*/
200   if (n_constraints) {
201     /* work vectors */
202     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
203     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
204     /* auxiliary matrices */
205     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
206     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
207     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
208     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
209 
210     /* Extract constraints on R nodes: C_{CR}  */
211     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
212     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
213     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
214 
215     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
216     for (i=0;i<n_constraints;i++) {
217       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
218       /* Get row of constraint matrix in R numbering */
219       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
221       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
222       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
223       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
224       /* Solve for row of constraint matrix in R numbering */
225       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
226       /* Set values in local_auxmat2 */
227       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
229       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
230     }
231     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
234 
235     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
236     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
237     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
238     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
239     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
240     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
241     ierr = MatSetUp(M1);CHKERRQ(ierr);
242     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
243     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
244     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
245     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
246     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
247     ierr = MatDestroy(&M2);CHKERRQ(ierr);
248     ierr = MatDestroy(&M3);CHKERRQ(ierr);
249     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
250     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
251   }
252 
253   /* Get submatrices from subdomain matrix */
254   if (n_vertices) {
255     PetscInt ibs,mbs;
256     PetscBool issbaij;
257     Mat newmat;
258 
259     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
260     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
261     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
262     if (ibs != mbs) { /* need to convert to SEQAIJ */
263       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
265       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
266       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
267       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
268     } else {
269       /* this is safe */
270       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
271       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
272       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
273         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
274         /* which of the two approaches is faster? */
275         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
277         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
278         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
279         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
280       } else {
281         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
282         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
283       }
284     }
285     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
286     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
287     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
288   }
289 
290   /* Matrix of coarse basis functions (local) */
291   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
292   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
293   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
294   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
295   if (pcbddc->switch_static || pcbddc->dbg_flag) {
296     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
297     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
298     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
299     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
300   }
301 
302   if (pcbddc->dbg_flag) {
303     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
304     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
305     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
306   }
307   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
308   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
309 
310   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
311 
312   /* vertices */
313   for (i=0;i<n_vertices;i++) {
314     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
315     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
316     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
317     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
318     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
319     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
320     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
321     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
322     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
323     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
324     if (n_constraints) {
325       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
326       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
327       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
328     }
329     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
330     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
331 
332     /* Set values in coarse basis function and subdomain part of coarse_mat */
333     /* coarse basis functions */
334     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
335     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
336     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
337     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
339     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
340     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
341     if (pcbddc->switch_static || pcbddc->dbg_flag) {
342       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
343       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
344       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
346       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
347     }
348     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
349     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
351     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
352     if (n_constraints) {
353       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
355       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
356     }
357 
358     /* check */
359     if (pcbddc->dbg_flag) {
360       /* assemble subdomain vector on local nodes */
361       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
362       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
365       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
366       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
367       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
368       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
369       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
370       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
372       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
373       if (n_constraints) {
374         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
375         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
376         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
377       }
378       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
380       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
381       /* check saddle point solution */
382       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
383       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
384       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
385       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
386       /* shift by the identity matrix */
387       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
388       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
389       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
390       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
391     }
392   }
393 
394   /* constraints */
395   for (i=0;i<n_constraints;i++) {
396     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
397     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
399     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
400     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
401     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
402     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
403     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
404     if (n_vertices) {
405       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
406     }
407     /* Set values in coarse basis function and subdomain part of coarse_mat */
408     /* coarse basis functions */
409     j = i+n_vertices; /* don't touch this! */
410     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
411     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
412     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
416     if (pcbddc->switch_static || pcbddc->dbg_flag) {
417       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
418       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
419       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
421       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
422     }
423     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
424     if (n_vertices) {
425       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
426       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
428     }
429     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
430     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
431     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
432 
433     if (pcbddc->dbg_flag) {
434       /* assemble subdomain vector on nodes */
435       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
436       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
438       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
439       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
440       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
441       /* assemble subdomain vector of lagrange multipliers */
442       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
443       if (n_vertices) {
444         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
445         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
446         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
447       }
448       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
450       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
454       /* check saddle point solution */
455       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
456       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
457       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
458       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
459       /* shift by the identity matrix */
460       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
461       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
462       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
463       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
464     }
465   }
466   /* call assembling routines for local coarse basis */
467   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469   if (pcbddc->switch_static || pcbddc->dbg_flag) {
470     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
472   }
473 
474   /* compute other basis functions for non-symmetric problems */
475   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
476   if (!setsym || (setsym && !issym)) {
477     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
478     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
479     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
480     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
483       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
484       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
485       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
486     }
487     for (i=0;i<pcbddc->local_primal_size;i++) {
488       if (n_constraints) {
489         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
490         for (j=0;j<n_constraints;j++) {
491           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
492         }
493         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
494         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
495       }
496       if (i<n_vertices) {
497         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
498         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
499         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
500         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
501         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
502         if (n_constraints) {
503           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
504         }
505       } else {
506         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
507       }
508       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
509       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
510       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
511       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
512       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
514       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
515       if (i<n_vertices) {
516         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
517       }
518       if (pcbddc->switch_static || pcbddc->dbg_flag) {
519         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
520         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
521         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
523         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
524       }
525 
526       if (pcbddc->dbg_flag) {
527         /* assemble subdomain vector on nodes */
528         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
529         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
531         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
532         if (i<n_vertices) {
533           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
534         }
535         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
536         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
537         /* assemble subdomain vector of lagrange multipliers */
538         for (j=0;j<pcbddc->local_primal_size;j++) {
539           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
540         }
541         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
542         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
543         /* check saddle point solution */
544         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
545         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
546         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
547         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
548         /* shift by the identity matrix */
549         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
550         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
551         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
552         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
553       }
554     }
555     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557     if (pcbddc->switch_static || pcbddc->dbg_flag) {
558       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     }
561   }
562   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
563   /* Checking coarse_sub_mat and coarse basis functios */
564   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
565   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
566   if (pcbddc->dbg_flag) {
567     Mat         coarse_sub_mat;
568     Mat         AUXMAT,TM1,TM2,TM3,TM4;
569     Mat         coarse_phi_D,coarse_phi_B;
570     Mat         coarse_psi_D,coarse_psi_B;
571     Mat         A_II,A_BB,A_IB,A_BI;
572     MatType     checkmattype=MATSEQAIJ;
573     PetscReal   real_value;
574 
575     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
576     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
577     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
578     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
579     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
580     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
581     if (pcbddc->coarse_psi_B) {
582       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
583       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
584     }
585     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
586 
587     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
588     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
589     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
590     if (pcbddc->coarse_psi_B) {
591       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
592       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
593       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
594       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
598       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
599       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
600       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
601       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
602       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
603     } else {
604       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
605       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
606       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
607       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
608       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
609       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
610       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
611       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
612     }
613     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
614     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
615     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
616     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
617     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
618     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
619     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
620     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
621     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
622     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
623     for (i=0;i<pcbddc->local_primal_size;i++) {
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
625     }
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
627     for (i=0;i<pcbddc->local_primal_size;i++) {
628       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
629     }
630     if (pcbddc->coarse_psi_B) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
632       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
633         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
634       }
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
636       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
637         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
638       }
639     }
640     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
641     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
645     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
646     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
647     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
648     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
649     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
650     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
651     if (pcbddc->coarse_psi_B) {
652       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
653       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
654     }
655     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
656     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
657     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
658     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
659   }
660   /* free memory */
661   if (n_vertices) {
662     ierr = PetscFree(vertices);CHKERRQ(ierr);
663     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
664     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
665     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
666     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
667     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
668   }
669   if (n_constraints) {
670     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
674   }
675   ierr = PetscFree(auxindices);CHKERRQ(ierr);
676   /* get back data */
677   *coarse_submat_vals_n = coarse_submat_vals;
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
683 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
684 {
685   PC_IS*            pcis = (PC_IS*)(pc->data);
686   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
687   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
688   PetscBool         issbaij,isbaij;
689   /* manage repeated solves */
690   MatReuse          reuse;
691   MatStructure      matstruct;
692   PetscErrorCode    ierr;
693 
694   PetscFunctionBegin;
695   /* get mat flags */
696   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
697   reuse = MAT_INITIAL_MATRIX;
698   if (pc->setupcalled) {
699     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
700     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
701     if (matstruct == SAME_NONZERO_PATTERN) {
702       reuse = MAT_REUSE_MATRIX;
703     } else {
704       reuse = MAT_INITIAL_MATRIX;
705     }
706   }
707   if (reuse == MAT_INITIAL_MATRIX) {
708     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
709     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
710     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
711     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
712     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
713   }
714 
715   /* transform local matrices if needed */
716   if (pcbddc->use_change_of_basis) {
717     Mat         change_mat_all;
718     PetscScalar *row_cmat_values;
719     PetscInt    *row_cmat_indices;
720     PetscInt    *nnz,*is_indices,*temp_indices;
721     PetscInt    i,j,k,n_D,n_B;
722 
723     /* Get Non-overlapping dimensions */
724     n_B = pcis->n_B;
725     n_D = pcis->n-n_B;
726 
727     /* compute nonzero structure of change of basis on all local nodes */
728     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
729     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
731     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     k=1;
734     for (i=0;i<n_B;i++) {
735       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
736       nnz[is_indices[i]]=j;
737       if (k < j) k = j;
738       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
739     }
740     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     /* assemble change of basis matrix on the whole set of local dofs */
742     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
743     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
744     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
745     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
746     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
747     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     for (i=0;i<n_D;i++) {
749       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
750     }
751     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
752     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
753     for (i=0;i<n_B;i++) {
754       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
755       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
756       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
757       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
758     }
759     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
760     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
761     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
762     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
763     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
764     if (!issbaij && !isbaij) {
765       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
766     } else {
767       Mat work_mat;
768       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
769       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
770       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
771     }
772     /*
773     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
774     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
775     */
776     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
777     ierr = PetscFree(nnz);CHKERRQ(ierr);
778     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
779   } else {
780     /* without change of basis, the local matrix is unchanged */
781     if (!pcbddc->local_mat) {
782       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
783       pcbddc->local_mat = matis->A;
784     }
785   }
786 
787   /* get submatrices */
788   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
789   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
790   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
791   if (!issbaij) {
792     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
793     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
794   } else {
795     Mat newmat;
796     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
797     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
798     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
799     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
806 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
807 {
808   PC_IS*         pcis = (PC_IS*)(pc->data);
809   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
810   IS             is_aux1,is_aux2;
811   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
812   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
813   PetscInt       vbs,bs;
814   PetscBT        bitmask;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   /* Set Non-overlapping dimensions */
819   n_B = pcis->n_B; n_D = pcis->n - n_B;
820   /* get vertex indices from constraint matrix */
821   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
822 
823   /* create auxiliary bitmask */
824   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
825   for (i=0;i<n_vertices;i++) {
826     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
827   }
828 
829   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
830   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
831   for (i=0, n_R=0; i<pcis->n; i++) {
832     if (!PetscBTLookup(bitmask,i)) {
833       idx_R_local[n_R] = i;
834       n_R++;
835     }
836   }
837 
838   /* Block code */
839   vbs = 1;
840   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
841   if (bs>1 && !(n_vertices%bs)) {
842     PetscBool is_blocked = PETSC_TRUE;
843     PetscInt  *vary;
844     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
845     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
846     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
847     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
848     for (i=0; i<n_vertices; i++) {
849       if (vary[i]!=0 && vary[i]!=bs) {
850         is_blocked = PETSC_FALSE;
851         break;
852       }
853     }
854     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
855       vbs = bs;
856       for (i=0;i<n_R/vbs;i++) {
857         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
858       }
859     }
860     ierr = PetscFree(vary);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(vertices);CHKERRQ(ierr);
863   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
864   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
865 
866   /* print some info if requested */
867   if (pcbddc->dbg_flag) {
868     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
869     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
870     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
871     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
872     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
873     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
874     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
875   }
876 
877   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
878   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
879   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
880   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
881   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
882   for (i=0; i<n_D; i++) {
883     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
884   }
885   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
886   for (i=0, j=0; i<n_R; i++) {
887     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
888       aux_array1[j++] = i;
889     }
890   }
891   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
892   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_B; i++) {
894     if (!PetscBTLookup(bitmask,is_indices[i])) {
895       aux_array2[j++] = i;
896     }
897   }
898   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
899   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
900   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
901   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
902   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
903 
904   if (pcbddc->switch_static || pcbddc->dbg_flag) {
905     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
906     for (i=0, j=0; i<n_R; i++) {
907       if (PetscBTLookup(bitmask,idx_R_local[i])) {
908         aux_array1[j++] = i;
909       }
910     }
911     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
912     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
913     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
914   }
915   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
916   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
923 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
924 {
925   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
926   PC_IS          *pcis = (PC_IS*)pc->data;
927   PC             pc_temp;
928   Mat            A_RR;
929   Vec            vec1,vec2,vec3;
930   MatStructure   matstruct;
931   PetscScalar    m_one = -1.0;
932   PetscReal      value;
933   PetscInt       n_D,n_R,ibs,mbs;
934   PetscBool      use_exact,use_exact_reduced,issbaij;
935   PetscErrorCode ierr;
936   /* prefixes stuff */
937   char           dir_prefix[256],neu_prefix[256],str_level[3];
938   size_t         len;
939 
940   PetscFunctionBegin;
941   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
942 
943   /* compute prefixes */
944   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
945   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
946   if (!pcbddc->current_level) {
947     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
948     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
949     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
950     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
951   } else {
952     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
953     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
954     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
955     len -= 15; /* remove "pc_bddc_coarse_" */
956     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
957     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
958     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
959     *(dir_prefix+len)='\0';
960     *(neu_prefix+len)='\0';
961     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
962     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
963     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
964     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
965   }
966 
967   /* DIRICHLET PROBLEM */
968   /* Matrix for Dirichlet problem is pcis->A_II */
969   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
970   if (!pcbddc->ksp_D) { /* create object if not yet build */
971     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
972     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
973     /* default */
974     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
975     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
976     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
977     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
978     if (issbaij) {
979       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
980     } else {
981       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
982     }
983     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
984   }
985   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
986   /* Allow user's customization */
987   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
988   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
989   if (!n_D) {
990     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
991     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
992   }
993   /* Set Up KSP for Dirichlet problem of BDDC */
994   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* set ksp_D into pcis data */
996   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
997   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
998   pcis->ksp_D = pcbddc->ksp_D;
999 
1000   /* NEUMANN PROBLEM */
1001   /* Matrix for Neumann problem is A_RR -> we need to create it */
1002   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1003   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1004   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1005   if (ibs != mbs) {
1006     Mat newmat;
1007     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1008     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1009     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1010   } else {
1011     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1012   }
1013   if (!pcbddc->ksp_R) { /* create object if not yet build */
1014     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1015     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1016     /* default */
1017     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1018     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1019     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1020     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1021     if (issbaij) {
1022       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1023     } else {
1024       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1025     }
1026     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1027   }
1028   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1029   /* Allow user's customization */
1030   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1031   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1032   if (!n_R) {
1033     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1034     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1035   }
1036   /* Set Up KSP for Neumann problem of BDDC */
1037   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1038 
1039   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1040 
1041   /* Dirichlet */
1042   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1043   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1044   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1045   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1046   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1047   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1048   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1049   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1050   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1051   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1052   /* need to be adapted? */
1053   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1054   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1055   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1056   /* print info */
1057   if (pcbddc->dbg_flag) {
1058     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1059     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1060     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1061     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1062     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1063   }
1064   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1065     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1066   }
1067 
1068   /* Neumann */
1069   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1070   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1071   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1072   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1073   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1074   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1075   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1076   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1077   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1078   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1079   /* need to be adapted? */
1080   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1081   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1082   /* print info */
1083   if (pcbddc->dbg_flag) {
1084     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1085     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1086   }
1087   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1088     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1089   }
1090   /* free Neumann problem's matrix */
1091   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1097 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1098 {
1099   PetscErrorCode ierr;
1100   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1101 
1102   PetscFunctionBegin;
1103   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1104   if (pcbddc->local_auxmat1) {
1105     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1106     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1113 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1114 {
1115   PetscErrorCode ierr;
1116   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1117   PC_IS*            pcis = (PC_IS*)  (pc->data);
1118   const PetscScalar zero = 0.0;
1119 
1120   PetscFunctionBegin;
1121   /* Application of PHI^T (or PSI^T)  */
1122   if (pcbddc->coarse_psi_B) {
1123     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1124     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1125   } else {
1126     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1127     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1128   }
1129   /* Scatter data of coarse_rhs */
1130   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1131   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1132 
1133   /* Local solution on R nodes */
1134   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1135   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137   if (pcbddc->switch_static) {
1138     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1140   }
1141   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1142   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1143   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   if (pcbddc->switch_static) {
1146     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148   }
1149 
1150   /* Coarse solution */
1151   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1153     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1154   }
1155   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157 
1158   /* Sum contributions from two levels */
1159   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1160   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1166 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1167 {
1168   PetscErrorCode ierr;
1169   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1170 
1171   PetscFunctionBegin;
1172   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1178 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1179 {
1180   PetscErrorCode ierr;
1181   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /* uncomment for testing purposes */
1189 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1190 #undef __FUNCT__
1191 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1192 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1193 {
1194   PetscErrorCode    ierr;
1195   PC_IS*            pcis = (PC_IS*)(pc->data);
1196   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1197   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1198   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1199   MatType           impMatType=MATSEQAIJ;
1200   /* one and zero */
1201   PetscScalar       one=1.0,zero=0.0;
1202   /* space to store constraints and their local indices */
1203   PetscScalar       *temp_quadrature_constraint;
1204   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1205   /* iterators */
1206   PetscInt          i,j,k,total_counts,temp_start_ptr;
1207   /* stuff to store connected components stored in pcbddc->mat_graph */
1208   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1209   PetscInt          n_ISForFaces,n_ISForEdges;
1210   /* near null space stuff */
1211   MatNullSpace      nearnullsp;
1212   const Vec         *nearnullvecs;
1213   Vec               *localnearnullsp;
1214   PetscBool         nnsp_has_cnst;
1215   PetscInt          nnsp_size;
1216   PetscScalar       *array;
1217   /* BLAS integers */
1218   PetscBLASInt      lwork,lierr;
1219   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1220   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1221   /* LAPACK working arrays for SVD or POD */
1222   PetscBool         skip_lapack;
1223   PetscScalar       *work;
1224   PetscReal         *singular_vals;
1225 #if defined(PETSC_USE_COMPLEX)
1226   PetscReal         *rwork;
1227 #endif
1228 #if defined(PETSC_MISSING_LAPACK_GESVD)
1229   PetscBLASInt      Blas_one_2=1;
1230   PetscScalar       *temp_basis,*correlation_mat;
1231 #else
1232   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1233   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1234 #endif
1235   /* change of basis */
1236   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1237   PetscBool         boolforchange,qr_needed;
1238   PetscBT           touched,change_basis,qr_needed_idx;
1239   /* auxiliary stuff */
1240   PetscInt          *nnz,*is_indices,*local_to_B;
1241   /* some quantities */
1242   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1243   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1244 
1245 
1246   PetscFunctionBegin;
1247   /* Get index sets for faces, edges and vertices from graph */
1248   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1249     pcbddc->use_vertices = PETSC_TRUE;
1250   }
1251   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1252   /* HACK: provide functions to set change of basis */
1253   if (!ISForVertices && pcbddc->NullSpace) {
1254     pcbddc->use_change_of_basis = PETSC_TRUE;
1255     pcbddc->use_change_on_faces = PETSC_FALSE;
1256   }
1257   /* print some info */
1258   if (pcbddc->dbg_flag) {
1259     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1260     i = 0;
1261     if (ISForVertices) {
1262       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1267     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1268   }
1269   /* check if near null space is attached to global mat */
1270   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1271   if (nearnullsp) {
1272     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1273   } else { /* if near null space is not provided BDDC uses constants by default */
1274     nnsp_size = 0;
1275     nnsp_has_cnst = PETSC_TRUE;
1276   }
1277   /* get max number of constraints on a single cc */
1278   max_constraints = nnsp_size;
1279   if (nnsp_has_cnst) max_constraints++;
1280 
1281   /*
1282        Evaluate maximum storage size needed by the procedure
1283        - temp_indices will contain start index of each constraint stored as follows
1284        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1285        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1286        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1287                                                                                                                                                          */
1288   total_counts = n_ISForFaces+n_ISForEdges;
1289   total_counts *= max_constraints;
1290   n_vertices = 0;
1291   if (ISForVertices) {
1292     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1293   }
1294   total_counts += n_vertices;
1295   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1296   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1297   total_counts = 0;
1298   max_size_of_constraint = 0;
1299   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1300     if (i<n_ISForEdges) {
1301       used_IS = &ISForEdges[i];
1302     } else {
1303       used_IS = &ISForFaces[i-n_ISForEdges];
1304     }
1305     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1306     total_counts += j;
1307     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1308   }
1309   total_counts *= max_constraints;
1310   total_counts += n_vertices;
1311   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1312   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1313   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1314   /* local to boundary numbering */
1315   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1316   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1317   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1318   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1319   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1320   /* get local part of global near null space vectors */
1321   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1322   for (k=0;k<nnsp_size;k++) {
1323     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1324     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326   }
1327 
1328   /* whether or not to skip lapack calls */
1329   skip_lapack = PETSC_TRUE;
1330   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1331 
1332   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1333   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1334     PetscScalar temp_work;
1335 #if defined(PETSC_MISSING_LAPACK_GESVD)
1336     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1337     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1338     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1339     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1340 #if defined(PETSC_USE_COMPLEX)
1341     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1342 #endif
1343     /* now we evaluate the optimal workspace using query with lwork=-1 */
1344     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1345     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1346     lwork = -1;
1347     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1348 #if !defined(PETSC_USE_COMPLEX)
1349     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1350 #else
1351     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1352 #endif
1353     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1354     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1355 #else /* on missing GESVD */
1356     /* SVD */
1357     PetscInt max_n,min_n;
1358     max_n = max_size_of_constraint;
1359     min_n = max_constraints;
1360     if (max_size_of_constraint < max_constraints) {
1361       min_n = max_size_of_constraint;
1362       max_n = max_constraints;
1363     }
1364     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1365 #if defined(PETSC_USE_COMPLEX)
1366     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1367 #endif
1368     /* now we evaluate the optimal workspace using query with lwork=-1 */
1369     lwork = -1;
1370     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1371     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1372     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1373     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1374 #if !defined(PETSC_USE_COMPLEX)
1375     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1376 #else
1377     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1378 #endif
1379     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1380     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1381 #endif /* on missing GESVD */
1382     /* Allocate optimal workspace */
1383     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1384     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1385   }
1386   /* Now we can loop on constraining sets */
1387   total_counts = 0;
1388   temp_indices[0] = 0;
1389   /* vertices */
1390   if (ISForVertices) {
1391     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1392     if (nnsp_has_cnst) { /* consider all vertices */
1393       for (i=0;i<n_vertices;i++) {
1394         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1395         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1396         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1397         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1398         total_counts++;
1399       }
1400     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1401       PetscBool used_vertex;
1402       for (i=0;i<n_vertices;i++) {
1403         used_vertex = PETSC_FALSE;
1404         k = 0;
1405         while (!used_vertex && k<nnsp_size) {
1406           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1407           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1408             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1409             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1410             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1411             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1412             total_counts++;
1413             used_vertex = PETSC_TRUE;
1414           }
1415           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1416           k++;
1417         }
1418       }
1419     }
1420     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1421     n_vertices = total_counts;
1422   }
1423 
1424   /* edges and faces */
1425   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1426     if (i<n_ISForEdges) {
1427       used_IS = &ISForEdges[i];
1428       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1429     } else {
1430       used_IS = &ISForFaces[i-n_ISForEdges];
1431       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1432     }
1433     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1434     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1435     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1436     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1437     /* change of basis should not be performed on local periodic nodes */
1438     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1439     if (nnsp_has_cnst) {
1440       PetscScalar quad_value;
1441       temp_constraints++;
1442       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1443       for (j=0;j<size_of_constraint;j++) {
1444         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1445         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1446         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1447       }
1448       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1449       total_counts++;
1450     }
1451     for (k=0;k<nnsp_size;k++) {
1452       PetscReal real_value;
1453       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1454       for (j=0;j<size_of_constraint;j++) {
1455         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1456         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1457         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1458       }
1459       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1460       /* check if array is null on the connected component */
1461       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1462       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1463       if (real_value > 0.0) { /* keep indices and values */
1464         temp_constraints++;
1465         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1466         total_counts++;
1467       }
1468     }
1469     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1470     valid_constraints = temp_constraints;
1471     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1472     if (!pcbddc->use_nnsp_true && temp_constraints) {
1473       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1474 
1475 #if defined(PETSC_MISSING_LAPACK_GESVD)
1476       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1477          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1478          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1479             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1480             from that computed using LAPACKgesvd
1481          -> This is due to a different computation of eigenvectors in LAPACKheev
1482          -> The quality of the POD-computed basis will be the same */
1483       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1484       /* Store upper triangular part of correlation matrix */
1485       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1486       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1487       for (j=0;j<temp_constraints;j++) {
1488         for (k=0;k<j+1;k++) {
1489           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1490         }
1491       }
1492       /* compute eigenvalues and eigenvectors of correlation matrix */
1493       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1494       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1495 #if !defined(PETSC_USE_COMPLEX)
1496       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1497 #else
1498       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1499 #endif
1500       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1501       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1502       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1503       j=0;
1504       while (j < temp_constraints && singular_vals[j] < tol) j++;
1505       total_counts=total_counts-j;
1506       valid_constraints = temp_constraints-j;
1507       /* scale and copy POD basis into used quadrature memory */
1508       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1514       if (j<temp_constraints) {
1515         PetscInt ii;
1516         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1517         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1518         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1519         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1520         for (k=0;k<temp_constraints-j;k++) {
1521           for (ii=0;ii<size_of_constraint;ii++) {
1522             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1523           }
1524         }
1525       }
1526 #else  /* on missing GESVD */
1527       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1528       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1529       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1530       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1531 #if !defined(PETSC_USE_COMPLEX)
1532       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1533 #else
1534       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1535 #endif
1536       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1537       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1538       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1539       k = temp_constraints;
1540       if (k > size_of_constraint) k = size_of_constraint;
1541       j = 0;
1542       while (j < k && singular_vals[k-j-1] < tol) j++;
1543       total_counts = total_counts-temp_constraints+k-j;
1544       valid_constraints = k-j;
1545 #endif /* on missing GESVD */
1546     }
1547     /* setting change_of_basis flag is safe now */
1548     if (boolforchange) {
1549       for (j=0;j<valid_constraints;j++) {
1550         PetscBTSet(change_basis,total_counts-j-1);
1551       }
1552     }
1553   }
1554   /* free index sets of faces, edges and vertices */
1555   for (i=0;i<n_ISForFaces;i++) {
1556     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1559   for (i=0;i<n_ISForEdges;i++) {
1560     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1561   }
1562   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1563   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1564 
1565   /* free workspace */
1566   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1567     ierr = PetscFree(work);CHKERRQ(ierr);
1568 #if defined(PETSC_USE_COMPLEX)
1569     ierr = PetscFree(rwork);CHKERRQ(ierr);
1570 #endif
1571     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1572 #if defined(PETSC_MISSING_LAPACK_GESVD)
1573     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1574     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1575 #endif
1576   }
1577   for (k=0;k<nnsp_size;k++) {
1578     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1579   }
1580   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1581 
1582   /* set quantities in pcbddc data structure */
1583   /* n_vertices defines the number of subdomain corners in the primal space */
1584   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1585   pcbddc->local_primal_size = total_counts;
1586   pcbddc->n_vertices = n_vertices;
1587   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1588 
1589   /* Create constraint matrix */
1590   /* The constraint matrix is used to compute the l2g map of primal dofs */
1591   /* so we need to set it up properly either with or without change of basis */
1592   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1593   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1594   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1595   /* array to compute a local numbering of constraints : vertices first then constraints */
1596   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1597   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1598   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1599   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1600   /* auxiliary stuff for basis change */
1601   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1602   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1603 
1604   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1605   total_primal_vertices=0;
1606   for (i=0;i<pcbddc->local_primal_size;i++) {
1607     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1608     if (size_of_constraint == 1) {
1609       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1610       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1611       aux_primal_minloc[total_primal_vertices]=0;
1612       total_primal_vertices++;
1613     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1614       PetscInt min_loc,min_index;
1615       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1616       /* find first untouched local node */
1617       k = 0;
1618       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1619       min_index = global_indices[k];
1620       min_loc = k;
1621       /* search the minimum among global nodes already untouched on the cc */
1622       for (k=1;k<size_of_constraint;k++) {
1623         /* there can be more than one constraint on a single connected component */
1624         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1625           min_index = global_indices[k];
1626           min_loc = k;
1627         }
1628       }
1629       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1630       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1631       aux_primal_minloc[total_primal_vertices]=min_loc;
1632       total_primal_vertices++;
1633     }
1634   }
1635   /* determine if a QR strategy is needed for change of basis */
1636   qr_needed = PETSC_FALSE;
1637   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1638   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1639     if (PetscBTLookup(change_basis,i)) {
1640       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1641       j = 0;
1642       for (k=0;k<size_of_constraint;k++) {
1643         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1644           j++;
1645         }
1646       }
1647       /* found more than one primal dof on the cc */
1648       if (j > 1) {
1649         PetscBTSet(qr_needed_idx,i);
1650         qr_needed = PETSC_TRUE;
1651       }
1652     }
1653   }
1654   /* free workspace */
1655   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1656 
1657   /* permute indices in order to have a sorted set of vertices */
1658   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1659 
1660   /* nonzero structure of constraint matrix */
1661   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1662   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1663   j=total_primal_vertices;
1664   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1665     if (!PetscBTLookup(change_basis,i)) {
1666       nnz[j]=temp_indices[i+1]-temp_indices[i];
1667       j++;
1668     }
1669   }
1670   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1671   ierr = PetscFree(nnz);CHKERRQ(ierr);
1672   /* set values in constraint matrix */
1673   for (i=0;i<total_primal_vertices;i++) {
1674     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1675   }
1676   total_counts = total_primal_vertices;
1677   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1678     if (!PetscBTLookup(change_basis,i)) {
1679       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1680       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1681       total_counts++;
1682     }
1683   }
1684   /* assembling */
1685   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   /*
1688   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1689   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1690   */
1691   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1692   if (pcbddc->use_change_of_basis) {
1693     /* dual and primal dofs on a single cc */
1694     PetscInt     dual_dofs,primal_dofs;
1695     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1696     PetscInt     primal_counter;
1697     /* working stuff for GEQRF */
1698     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1699     PetscBLASInt lqr_work;
1700     /* working stuff for UNGQR */
1701     PetscScalar  *gqr_work,lgqr_work_t;
1702     PetscBLASInt lgqr_work;
1703     /* working stuff for TRTRS */
1704     PetscScalar  *trs_rhs;
1705     PetscBLASInt Blas_NRHS;
1706     /* pointers for values insertion into change of basis matrix */
1707     PetscInt     *start_rows,*start_cols;
1708     PetscScalar  *start_vals;
1709     /* working stuff for values insertion */
1710     PetscBT      is_primal;
1711 
1712     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1713     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1714     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1715     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1716     /* work arrays */
1717     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1718     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1719     /* nonzeros per row */
1720     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1721       if (PetscBTLookup(change_basis,i)) {
1722         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1723         if (PetscBTLookup(qr_needed_idx,i)) {
1724           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1725         } else {
1726           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1727           /* get local primal index on the cc */
1728           j = 0;
1729           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1730           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1731         }
1732       }
1733     }
1734     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1735     ierr = PetscFree(nnz);CHKERRQ(ierr);
1736     /* Set initial identity in the matrix */
1737     for (i=0;i<pcis->n_B;i++) {
1738       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1739     }
1740 
1741     if (pcbddc->dbg_flag) {
1742       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1743       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1744     }
1745 
1746 
1747     /* Now we loop on the constraints which need a change of basis */
1748     /*
1749        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1750        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1751 
1752        Basic blocks of change of basis matrix T computed
1753 
1754           - Using the following block transformation if there is only a primal dof on the cc
1755             (in the example, primal dof is the last one of the edge in LOCAL ordering
1756              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1757             | 1        0   ...        0     1 |
1758             | 0        1   ...        0     1 |
1759             |              ...                |
1760             | 0        ...            1     1 |
1761             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1762 
1763           - via QR decomposition of constraints otherwise
1764     */
1765     if (qr_needed) {
1766       /* space to store Q */
1767       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1768       /* first we issue queries for optimal work */
1769       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1770       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1771       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1772       lqr_work = -1;
1773       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1774       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1775       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1776       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1777       lgqr_work = -1;
1778       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1779       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1780       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1781       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1782       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1783       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1784       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1785       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1786       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1787       /* array to store scaling factors for reflectors */
1788       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1789       /* array to store rhs and solution of triangular solver */
1790       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1791       /* allocating workspace for check */
1792       if (pcbddc->dbg_flag) {
1793         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1794       }
1795     }
1796     /* array to store whether a node is primal or not */
1797     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1798     for (i=0;i<total_primal_vertices;i++) {
1799       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1800     }
1801 
1802     /* loop on constraints and see whether or not they need a change of basis and compute it */
1803     /* -> using implicit ordering contained in temp_indices data */
1804     total_counts = pcbddc->n_vertices;
1805     primal_counter = total_counts;
1806     while (total_counts<pcbddc->local_primal_size) {
1807       primal_dofs = 1;
1808       if (PetscBTLookup(change_basis,total_counts)) {
1809         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1810         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1811           primal_dofs++;
1812         }
1813         /* get constraint info */
1814         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1815         dual_dofs = size_of_constraint-primal_dofs;
1816 
1817         if (pcbddc->dbg_flag) {
1818           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1819         }
1820 
1821         if (primal_dofs > 1) { /* QR */
1822 
1823           /* copy quadrature constraints for change of basis check */
1824           if (pcbddc->dbg_flag) {
1825             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1826           }
1827           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1828           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1829 
1830           /* compute QR decomposition of constraints */
1831           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1832           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1833           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1834           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1835           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1836           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1837           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1838 
1839           /* explictly compute R^-T */
1840           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1841           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1842           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1843           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1844           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1845           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1846           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1847           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1848           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1849           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1850 
1851           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1852           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1854           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1855           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1856           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1857           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1858           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1859           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1860 
1861           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1862              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1863              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1864           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1869           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1870           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1871           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1872           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1873           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1874 
1875           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1876           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1877           /* insert cols for primal dofs */
1878           for (j=0;j<primal_dofs;j++) {
1879             start_vals = &qr_basis[j*size_of_constraint];
1880             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1881             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1882           }
1883           /* insert cols for dual dofs */
1884           for (j=0,k=0;j<dual_dofs;k++) {
1885             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1886               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1887               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1888               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1889               j++;
1890             }
1891           }
1892 
1893           /* check change of basis */
1894           if (pcbddc->dbg_flag) {
1895             PetscInt   ii,jj;
1896             PetscBool valid_qr=PETSC_TRUE;
1897             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1898             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1899             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1901             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1902             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1903             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1904             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1905             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1906             for (jj=0;jj<size_of_constraint;jj++) {
1907               for (ii=0;ii<primal_dofs;ii++) {
1908                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1909                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1910               }
1911             }
1912             if (!valid_qr) {
1913               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1914               for (jj=0;jj<size_of_constraint;jj++) {
1915                 for (ii=0;ii<primal_dofs;ii++) {
1916                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1917                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1918                   }
1919                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1920                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1921                   }
1922                 }
1923               }
1924             } else {
1925               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1926             }
1927           }
1928         } else { /* simple transformation block */
1929           PetscInt row,col;
1930           PetscScalar val;
1931           for (j=0;j<size_of_constraint;j++) {
1932             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1933             if (!PetscBTLookup(is_primal,row)) {
1934               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1935               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1936               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1937             } else {
1938               for (k=0;k<size_of_constraint;k++) {
1939                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1940                 if (row != col) {
1941                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1942                 } else {
1943                   val = 1.0;
1944                 }
1945                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1946               }
1947             }
1948           }
1949         }
1950         /* increment primal counter */
1951         primal_counter += primal_dofs;
1952       } else {
1953         if (pcbddc->dbg_flag) {
1954           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1955         }
1956       }
1957       /* increment constraint counter total_counts */
1958       total_counts += primal_dofs;
1959     }
1960 
1961     /* free workspace */
1962     if (qr_needed) {
1963       if (pcbddc->dbg_flag) {
1964         ierr = PetscFree(work);CHKERRQ(ierr);
1965       }
1966       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1967       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1968       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1969       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1970       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1971     }
1972     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1973     /* assembling */
1974     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     /*
1977     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1978     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1979     */
1980   }
1981 
1982   /* flush dbg viewer */
1983   if (pcbddc->dbg_flag) {
1984     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1985   }
1986 
1987   /* free workspace */
1988   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1989   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1990   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1991   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1992   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1993   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1994   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1995   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1996   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1997   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2003 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2004 {
2005   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2006   PC_IS       *pcis = (PC_IS*)pc->data;
2007   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2008   PetscInt    bs,ierr,i,vertex_size;
2009   PetscViewer viewer=pcbddc->dbg_viewer;
2010 
2011   PetscFunctionBegin;
2012   /* Init local Graph struct */
2013   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2014 
2015   /* Check validity of the csr graph passed in by the user */
2016   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2017     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2018   }
2019 
2020   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2021   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2022     Mat mat_adj;
2023     const PetscInt *xadj,*adjncy;
2024     PetscBool flg_row=PETSC_TRUE;
2025 
2026     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2027     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2028     if (!flg_row) {
2029       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2030     }
2031     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2032     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2033     if (!flg_row) {
2034       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2035     }
2036     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2037   }
2038 
2039   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2040   vertex_size = 1;
2041   if (!pcbddc->user_provided_isfordofs) {
2042     if (!pcbddc->n_ISForDofs) {
2043       IS *custom_ISForDofs;
2044 
2045       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2046       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2047       for (i=0;i<bs;i++) {
2048         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2049       }
2050       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2051       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2052       /* remove my references to IS objects */
2053       for (i=0;i<bs;i++) {
2054         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2055       }
2056       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2057     }
2058   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2059     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2060   }
2061 
2062   /* Setup of Graph */
2063   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2064     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2065   }
2066   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2067     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2068   }
2069   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2070 
2071   /* Graph's connected components analysis */
2072   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2073 
2074   /* print some info to stdout */
2075   if (pcbddc->dbg_flag) {
2076     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2083 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2084 {
2085   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2086   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   n = 0;
2091   vertices = 0;
2092   if (pcbddc->ConstraintMatrix) {
2093     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2094     for (i=0;i<local_primal_size;i++) {
2095       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2096       if (size_of_constraint == 1) n++;
2097       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2098     }
2099     if (vertices_idx) {
2100       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2101       n = 0;
2102       for (i=0;i<local_primal_size;i++) {
2103         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2104         if (size_of_constraint == 1) {
2105           vertices[n++]=row_cmat_indices[0];
2106         }
2107         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2108       }
2109     }
2110   }
2111   *n_vertices = n;
2112   if (vertices_idx) *vertices_idx = vertices;
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2118 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2119 {
2120   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2121   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2122   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2123   PetscBT        touched;
2124   PetscErrorCode ierr;
2125 
2126     /* This function assumes that the number of local constraints per connected component
2127        is not greater than the number of nodes defined for the connected component
2128        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2129   PetscFunctionBegin;
2130   n = 0;
2131   constraints_index = 0;
2132   if (pcbddc->ConstraintMatrix) {
2133     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2134     max_size_of_constraint = 0;
2135     for (i=0;i<local_primal_size;i++) {
2136       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2137       if (size_of_constraint > 1) {
2138         n++;
2139       }
2140       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2141       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2142     }
2143     if (constraints_idx) {
2144       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2145       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2146       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2147       n = 0;
2148       for (i=0;i<local_primal_size;i++) {
2149         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2150         if (size_of_constraint > 1) {
2151           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2152           /* find first untouched local node */
2153           j = 0;
2154           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2155           min_index = row_cmat_global_indices[j];
2156           min_loc = j;
2157           /* search the minimum among nodes not yet touched on the connected component
2158              since there can be more than one constraint on a single cc */
2159           for (j=1;j<size_of_constraint;j++) {
2160             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2161               min_index = row_cmat_global_indices[j];
2162               min_loc = j;
2163             }
2164           }
2165           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2166           constraints_index[n++] = row_cmat_indices[min_loc];
2167         }
2168         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2169       }
2170       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2171       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2172     }
2173   }
2174   *n_constraints = n;
2175   if (constraints_idx) *constraints_idx = constraints_index;
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* the next two functions has been adapted from pcis.c */
2180 #undef __FUNCT__
2181 #define __FUNCT__ "PCBDDCApplySchur"
2182 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2183 {
2184   PetscErrorCode ierr;
2185   PC_IS          *pcis = (PC_IS*)(pc->data);
2186 
2187   PetscFunctionBegin;
2188   if (!vec2_B) { vec2_B = v; }
2189   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2190   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2191   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2192   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2193   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2199 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2200 {
2201   PetscErrorCode ierr;
2202   PC_IS          *pcis = (PC_IS*)(pc->data);
2203 
2204   PetscFunctionBegin;
2205   if (!vec2_B) { vec2_B = v; }
2206   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2207   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2208   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2209   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2210   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "PCBDDCSubsetNumbering"
2216 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2217 {
2218   Vec            local_vec,global_vec;
2219   IS             seqis,paris;
2220   VecScatter     scatter_ctx;
2221   PetscScalar    *array;
2222   PetscInt       *temp_global_dofs;
2223   PetscScalar    globalsum;
2224   PetscInt       i,j,s;
2225   PetscInt       nlocals,first_index,old_index,max_local;
2226   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2227   PetscMPIInt    *dof_sizes,*dof_displs;
2228   PetscBool      first_found;
2229   PetscErrorCode ierr;
2230 
2231   PetscFunctionBegin;
2232   /* mpi buffers */
2233   MPI_Comm_size(comm,&size_prec_comm);
2234   MPI_Comm_rank(comm,&rank_prec_comm);
2235   j = ( !rank_prec_comm ? size_prec_comm : 0);
2236   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2237   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2238   /* get maximum size of subset */
2239   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2240   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2241   max_local = 0;
2242   if (n_local_dofs) {
2243     max_local = temp_global_dofs[0];
2244     for (i=1;i<n_local_dofs;i++) {
2245       if (max_local < temp_global_dofs[i] ) {
2246         max_local = temp_global_dofs[i];
2247       }
2248     }
2249   }
2250   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2251   max_global++;
2252   max_local = 0;
2253   if (n_local_dofs) {
2254     max_local = local_dofs[0];
2255     for (i=1;i<n_local_dofs;i++) {
2256       if (max_local < local_dofs[i] ) {
2257         max_local = local_dofs[i];
2258       }
2259     }
2260   }
2261   max_local++;
2262   /* allocate workspace */
2263   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2264   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2265   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2266   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2267   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2268   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2269   /* create scatter */
2270   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2271   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2272   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2273   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2274   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2275   /* init array */
2276   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2277   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2278   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2279   if (local_dofs_mult) {
2280     for (i=0;i<n_local_dofs;i++) {
2281       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2282     }
2283   } else {
2284     for (i=0;i<n_local_dofs;i++) {
2285       array[local_dofs[i]]=1.0;
2286     }
2287   }
2288   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2289   /* scatter into global vec and get total number of global dofs */
2290   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2291   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2292   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2293   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2294   /* Fill global_vec with cumulative function for global numbering */
2295   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2296   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2297   nlocals = 0;
2298   first_index = -1;
2299   first_found = PETSC_FALSE;
2300   for (i=0;i<s;i++) {
2301     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2302       first_found = PETSC_TRUE;
2303       first_index = i;
2304     }
2305     nlocals += (PetscInt)PetscRealPart(array[i]);
2306   }
2307   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2308   if (!rank_prec_comm) {
2309     dof_displs[0]=0;
2310     for (i=1;i<size_prec_comm;i++) {
2311       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2312     }
2313   }
2314   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2315   if (first_found) {
2316     array[first_index] += (PetscScalar)nlocals;
2317     old_index = first_index;
2318     for (i=first_index+1;i<s;i++) {
2319       if (PetscRealPart(array[i]) > 0.0) {
2320         array[i] += array[old_index];
2321         old_index = i;
2322       }
2323     }
2324   }
2325   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2326   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2327   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2328   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2329   /* get global ordering of local dofs */
2330   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2331   if (local_dofs_mult) {
2332     for (i=0;i<n_local_dofs;i++) {
2333       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2334     }
2335   } else {
2336     for (i=0;i<n_local_dofs;i++) {
2337       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2338     }
2339   }
2340   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2341   /* free workspace */
2342   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2343   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2344   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2345   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2346   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2347   /* return pointer to global ordering of local dofs */
2348   *global_numbering_subset = temp_global_dofs;
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2354 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2355 {
2356   PetscInt       i,j;
2357   PetscScalar    *alphas;
2358   PetscErrorCode ierr;
2359 
2360   PetscFunctionBegin;
2361   /* this implements stabilized Gram-Schmidt */
2362   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2363   for (i=0;i<n;i++) {
2364     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2365     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2366     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2367   }
2368   ierr = PetscFree(alphas);CHKERRQ(ierr);
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatConvert_IS_AIJ"
2375 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2376 {
2377   Mat new_mat;
2378   Mat_IS *matis = (Mat_IS*)(mat->data);
2379   /* info on mat */
2380   PetscInt bs,rows,cols;
2381   PetscInt lrows,lcols;
2382   PetscInt local_rows,local_cols;
2383   PetscMPIInt nsubdomains;
2384   /* preallocation */
2385   Vec vec_dnz,vec_onz;
2386   PetscScalar *my_dnz,*my_onz,*array;
2387   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2388   PetscInt  index_row,index_col,owner;
2389   PetscInt  *local_indices,*global_indices;
2390   /* work */
2391   PetscInt i,j;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   /* CHECKS */
2396   /* get info from mat */
2397   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2398   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2399   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2400   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2401 
2402   /* MAT_INITIAL_MATRIX */
2403   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2404   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2405   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2406   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2407   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2408   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2409 
2410   /*
2411     preallocation
2412   */
2413 
2414   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2415   /*
2416      Some vectors are needed to sum up properly on shared interface dofs.
2417      Note that preallocation is not exact, since it overestimates nonzeros
2418   */
2419 /*
2420   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2421   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2422   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2423   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2424 */
2425   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2426   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2427   /* All processes need to compute entire row ownership */
2428   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2429   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2430   for (i=0;i<nsubdomains;i++) {
2431     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2432       row_ownership[j]=i;
2433     }
2434   }
2435   /* map indices of local mat to global values */
2436   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2437   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2438   for (i=0;i<local_rows;i++) local_indices[i]=i;
2439   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2440   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2441 
2442   /*
2443      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2444      then, they will be summed up properly. This way, preallocation is always sufficient
2445   */
2446   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2447   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2448   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2449   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2450   for (i=0;i<local_rows;i++) {
2451     index_row = global_indices[i];
2452     for (j=i;j<local_rows;j++) {
2453       owner = row_ownership[index_row];
2454       index_col = global_indices[j];
2455       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2456         my_dnz[i] += 1.0;
2457       } else { /* offdiag block */
2458         my_onz[i] += 1.0;
2459       }
2460       /* same as before, interchanging rows and cols */
2461       if (i != j) {
2462         owner = row_ownership[index_col];
2463         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2464           my_dnz[j] += 1.0;
2465         } else {
2466           my_onz[j] += 1.0;
2467         }
2468       }
2469     }
2470   }
2471   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2472   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2473   if (local_rows) { /* multilevel guard */
2474     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2475     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2476   }
2477   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2478   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2479   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2480   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2481   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2482   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2483   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2484 
2485   /* set computed preallocation in dnz and onz */
2486   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2487   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2488   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2489   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2490   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2491   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2492   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2493   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2494 
2495   /* Resize preallocation if overestimated */
2496   for (i=0;i<lrows;i++) {
2497     dnz[i] = PetscMin(dnz[i],lcols);
2498     onz[i] = PetscMin(onz[i],cols-lcols);
2499   }
2500   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2501   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2502 
2503   /*
2504     Set values. Very Basic.
2505   */
2506   for (i=0;i<local_rows;i++) {
2507     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2508     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2509     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2510     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2511     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2512   }
2513   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2514   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2515   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2516 
2517   /* get back mat */
2518   *M = new_mat;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatISSubassemble_Private"
2524 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2525 {
2526   Mat             subdomain_adj;
2527   IS              new_ranks,ranks_send_to;
2528   MatPartitioning partitioner;
2529   Mat_IS          *matis;
2530   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2531   PetscInt        prank;
2532   PetscMPIInt     size,rank,color;
2533   PetscInt        *xadj,*adjncy,*oldranks;
2534   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2535   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2536   PetscErrorCode  ierr;
2537   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2538   PetscSubcomm    subcomm;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2542   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2543   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2544 
2545   /* Get info on mapping */
2546   matis = (Mat_IS*)(mat->data);
2547   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2548   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2549 
2550   /* build local CSR graph of subdomains' connectivity */
2551   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2552   xadj[0] = 0;
2553   xadj[1] = PetscMax(n_neighs-1,0);
2554   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2555   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2556 
2557   if (threshold) {
2558     PetscInt* count,min_threshold;
2559     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2560     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2561     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2562       for (j=0;j<n_shared[i];j++) {
2563         count[shared[i][j]] += 1;
2564       }
2565     }
2566     /* adapt threshold since we dont want to lose significant connections */
2567     min_threshold = n_neighs;
2568     for (i=1;i<n_neighs;i++) {
2569       for (j=0;j<n_shared[i];j++) {
2570         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2571       }
2572     }
2573     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2574     threshold = PetscMax(min_threshold+1,threshold);
2575     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2576     xadj[1] = 0;
2577     for (i=1;i<n_neighs;i++) {
2578       for (j=0;j<n_shared[i];j++) {
2579         if (count[shared[i][j]] < threshold) {
2580           adjncy[xadj[1]] = neighs[i];
2581           adjncy_wgt[xadj[1]] = n_shared[i];
2582           xadj[1]++;
2583           break;
2584         }
2585       }
2586     }
2587     ierr = PetscFree(count);CHKERRQ(ierr);
2588   } else {
2589     if (xadj[1]) {
2590       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2591       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2592     }
2593   }
2594   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2595   if (use_square) {
2596     for (i=0;i<xadj[1];i++) {
2597       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2598     }
2599   }
2600   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2601 
2602   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2603 
2604   /*
2605     Restrict work on active processes only.
2606   */
2607   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2608   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2609   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2610   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2611   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2612   if (color) {
2613     ierr = PetscFree(xadj);CHKERRQ(ierr);
2614     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2615     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2616   } else {
2617     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2618     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2619     prank = rank;
2620     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2621     for (i=0;i<size;i++) {
2622       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2623     }
2624     for (i=0;i<xadj[1];i++) {
2625       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2626     }
2627     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2628     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2629     n_subdomains = (PetscInt)size;
2630     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2631 
2632     /* Partition */
2633     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2634     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2635     if (use_vwgt) {
2636       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2637       v_wgt[0] = local_size;
2638       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2639     }
2640     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2641     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2642     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2643     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2644     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2645 
2646     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2647     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2648     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2649     /* clean up */
2650     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2651     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2652     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2653     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2654   }
2655   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2656 
2657   /* assemble parallel IS for sends */
2658   i = 1;
2659   if (color) i=0;
2660   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2661   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2662 
2663   /* get back IS */
2664   *is_sends = ranks_send_to;
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatISSubassemble"
2672 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2673 {
2674   Mat                    local_mat,new_mat;
2675   Mat_IS                 *matis;
2676   IS                     is_sends_internal;
2677   PetscInt               rows,cols;
2678   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2679   PetscBool              ismatis,isdense;
2680   ISLocalToGlobalMapping l2gmap;
2681   PetscInt*              l2gmap_indices;
2682   const PetscInt*        is_indices;
2683   MatType                new_local_type;
2684   MatTypePrivate         new_local_type_private;
2685   /* buffers */
2686   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2687   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2688   /* MPI */
2689   MPI_Comm               comm;
2690   PetscMPIInt            n_sends,n_recvs,commsize;
2691   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2692   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2693   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2694   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2695   PetscErrorCode         ierr;
2696 
2697   PetscFunctionBegin;
2698   /* checks */
2699   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2700   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2701   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2702   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2703   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2704   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2705   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2706   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2707   PetscValidLogicalCollectiveInt(mat,bs,0);
2708   /* prepare IS for sending if not provided */
2709   if (!is_sends) {
2710     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2711     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2712   } else {
2713     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2714     is_sends_internal = is_sends;
2715   }
2716 
2717   /* get pointer of MATIS data */
2718   matis = (Mat_IS*)mat->data;
2719 
2720   /* get comm */
2721   comm = PetscObjectComm((PetscObject)mat);
2722 
2723   /* compute number of sends */
2724   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2725   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2726 
2727   /* compute number of receives */
2728   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2729   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2730   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2731   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2732   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2733   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2734   ierr = PetscFree(iflags);CHKERRQ(ierr);
2735 
2736   /* prepare send/receive buffers */
2737   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2738   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2739   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2740   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2741 
2742   /* Get data from local mat */
2743   if (!isdense) {
2744     /* TODO: See below some guidelines on how to prepare the local buffers */
2745     /*
2746        send_buffer_vals should contain the raw values of the local matrix
2747        send_buffer_idxs should contain:
2748        - MatType_PRIVATE type
2749        - PetscInt        size_of_l2gmap
2750        - PetscInt        global_row_indices[size_of_l2gmap]
2751        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2752     */
2753   } else {
2754     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2755     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2756     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2757     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2758     send_buffer_idxs[1] = i;
2759     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2760     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2761     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2762     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2763     for (i=0;i<n_sends;i++) {
2764       ilengths_vals[is_indices[i]] = len*len;
2765       ilengths_idxs[is_indices[i]] = len+2;
2766     }
2767   }
2768   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2769   buf_size_idxs = 0;
2770   buf_size_vals = 0;
2771   for (i=0;i<n_recvs;i++) {
2772     buf_size_idxs += (PetscInt)olengths_idxs[i];
2773     buf_size_vals += (PetscInt)olengths_vals[i];
2774   }
2775   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2776   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2777 
2778   /* get new tags for clean communications */
2779   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2780   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2781 
2782   /* allocate for requests */
2783   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2784   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2785   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2786   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2787 
2788   /* communications */
2789   ptr_idxs = recv_buffer_idxs;
2790   ptr_vals = recv_buffer_vals;
2791   for (i=0;i<n_recvs;i++) {
2792     source_dest = onodes[i];
2793     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2794     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2795     ptr_idxs += olengths_idxs[i];
2796     ptr_vals += olengths_vals[i];
2797   }
2798   for (i=0;i<n_sends;i++) {
2799     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2800     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2801     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2802   }
2803   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2804   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2805 
2806   /* assemble new l2g map */
2807   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2808   ptr_idxs = recv_buffer_idxs;
2809   buf_size_idxs = 0;
2810   for (i=0;i<n_recvs;i++) {
2811     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2812     ptr_idxs += olengths_idxs[i];
2813   }
2814   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2815   ptr_idxs = recv_buffer_idxs;
2816   buf_size_idxs = 0;
2817   for (i=0;i<n_recvs;i++) {
2818     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2819     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2820     ptr_idxs += olengths_idxs[i];
2821   }
2822   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2823   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2824   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2825 
2826   /* infer new local matrix type from received local matrices type */
2827   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2828   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2829   new_local_type_private = MATAIJ_PRIVATE;
2830   new_local_type = MATSEQAIJ;
2831   if (n_recvs) {
2832     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2833     ptr_idxs = recv_buffer_idxs;
2834     for (i=0;i<n_recvs;i++) {
2835       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2836         new_local_type_private = MATAIJ_PRIVATE;
2837         break;
2838       }
2839       ptr_idxs += olengths_idxs[i];
2840     }
2841     switch (new_local_type_private) {
2842       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2843         new_local_type = MATSEQAIJ;
2844         bs = 1;
2845         break;
2846       case MATAIJ_PRIVATE:
2847         new_local_type = MATSEQAIJ;
2848         bs = 1;
2849         break;
2850       case MATBAIJ_PRIVATE:
2851         new_local_type = MATSEQBAIJ;
2852         break;
2853       case MATSBAIJ_PRIVATE:
2854         new_local_type = MATSEQSBAIJ;
2855         break;
2856       default:
2857         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2858         break;
2859     }
2860   }
2861 
2862   /* create MATIS object */
2863   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2864   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2865   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2866   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2867   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2868   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2869 
2870   /* set values */
2871   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2872   ptr_vals = recv_buffer_vals;
2873   ptr_idxs = recv_buffer_idxs;
2874   for (i=0;i<n_recvs;i++) {
2875     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2876       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2877       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2878       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2879       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2880       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2881     }
2882     ptr_idxs += olengths_idxs[i];
2883     ptr_vals += olengths_vals[i];
2884   }
2885   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2888   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2889 
2890   { /* check */
2891     Vec       lvec,rvec;
2892     PetscReal infty_error;
2893 
2894     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2895     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2896     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2897     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2898     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2899     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2900     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2901     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2902     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2903   }
2904 
2905   /* free workspace */
2906   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2907   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2908   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2909   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2910   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2911   if (isdense) {
2912     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2913     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2914   } else {
2915     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2916   }
2917   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2918   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2919   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2920   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2921   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2922   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2923   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2924   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2925   ierr = PetscFree(onodes);CHKERRQ(ierr);
2926   /* get back new mat */
2927   *mat_n = new_mat;
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2933 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2934 {
2935   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2936   PC_IS                  *pcis = (PC_IS*)pc->data;
2937   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2938   MatNullSpace           CoarseNullSpace=NULL;
2939   ISLocalToGlobalMapping coarse_islg;
2940   IS                     coarse_is;
2941   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2942   PetscInt               im_active=-1,active_procs=-1;
2943   PC                     pc_temp;
2944   PCType                 coarse_pc_type;
2945   KSPType                coarse_ksp_type;
2946   PetscBool              multilevel_requested,multilevel_allowed;
2947   PetscBool              setsym,issym,isbddc,isnn;
2948   MatStructure           matstruct;
2949   PetscErrorCode         ierr;
2950 
2951   PetscFunctionBegin;
2952   /* Assign global numbering to coarse dofs */
2953   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2954 
2955   /* infer some info from user */
2956   issym = PETSC_FALSE;
2957   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2958   multilevel_allowed = PETSC_FALSE;
2959   multilevel_requested = PETSC_FALSE;
2960   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2961   if (multilevel_requested) {
2962     /* count "active processes" */
2963     im_active = !!(pcis->n);
2964     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2965     if (active_procs/pcbddc->coarsening_ratio < 2) {
2966       multilevel_allowed = PETSC_FALSE;
2967     } else {
2968       multilevel_allowed = PETSC_TRUE;
2969     }
2970   }
2971 
2972   /* set defaults for coarse KSP and PC */
2973   if (multilevel_allowed) {
2974     if (issym) {
2975       coarse_ksp_type = KSPRICHARDSON;
2976     } else {
2977       coarse_ksp_type = KSPCHEBYSHEV;
2978     }
2979     coarse_pc_type = PCBDDC;
2980   } else {
2981     coarse_ksp_type = KSPPREONLY;
2982     coarse_pc_type = PCREDUNDANT;
2983   }
2984 
2985   /* create the coarse KSP object only once with defaults */
2986   if (!pcbddc->coarse_ksp) {
2987     char prefix[256],str_level[3];
2988     size_t len;
2989     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2990     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2991     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2992     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2993     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2994     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2995     /* prefix */
2996     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2997     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2998     if (!pcbddc->current_level) {
2999       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3000       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3001     } else {
3002       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3003       if (pcbddc->current_level>1) len -= 2;
3004       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3005       *(prefix+len)='\0';
3006       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3007       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3008     }
3009     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3010   }
3011   /* allow user customization */
3012   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3013   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3014   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3015 
3016   /* get some info after set from options */
3017   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3018   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3019   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3020   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3021     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3022     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3023     isbddc = PETSC_FALSE;
3024   }
3025 
3026   /* propagate BDDC info to the next level */
3027   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3028   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3029   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3030 
3031   /* creates temporary MATIS object for coarse matrix */
3032   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3033   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3034   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3035   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3036   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3037   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3040   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3041   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3042 
3043   /* assemble coarse matrix */
3044   if (isbddc || isnn) {
3045     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3046   } else {
3047     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3048   }
3049   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3050 
3051   /* create local to global scatters for coarse problem */
3052   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3053   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3054   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3055 
3056   /* propagate symmetry info to coarse matrix */
3057   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3058 
3059   /* Compute coarse null space (special handling by BDDC only) */
3060   if (pcbddc->NullSpace) {
3061     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3062     if (isbddc) {
3063       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3064     } else {
3065       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3066     }
3067   }
3068 
3069   /* set operators */
3070   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3071   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3072 
3073   /* additional KSP customization */
3074   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3075   if (max_it < 5) {
3076     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3077   }
3078   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3079 
3080 
3081   /* print some info if requested */
3082   if (pcbddc->dbg_flag) {
3083     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3084     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3085     if (!multilevel_allowed) {
3086       if (multilevel_requested) {
3087         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3088       } else if (pcbddc->max_levels) {
3089         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3090       }
3091     }
3092     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3093     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3094   }
3095 
3096   /* setup coarse ksp */
3097   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3098   if (pcbddc->dbg_flag) {
3099     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3100     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3101     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3102     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3103   }
3104 
3105   /* Check coarse problem if requested */
3106   if (pcbddc->dbg_flag) {
3107     KSP       check_ksp;
3108     KSPType   check_ksp_type;
3109     PC        check_pc;
3110     Vec       check_vec;
3111     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3112     PetscInt  its;
3113     PetscBool ispreonly,compute;
3114 
3115     /* Create ksp object suitable for estimation of extreme eigenvalues */
3116     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3117     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3118     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3119     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3120     if (ispreonly) {
3121       check_ksp_type = KSPPREONLY;
3122       compute = PETSC_FALSE;
3123     } else {
3124       if (issym) check_ksp_type = KSPCG;
3125       else check_ksp_type = KSPGMRES;
3126       compute = PETSC_TRUE;
3127     }
3128     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3129     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3130     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3131     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3132     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3133     /* create random vec */
3134     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3135     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3136     if (CoarseNullSpace) {
3137       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3138     }
3139     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3140     /* solve coarse problem */
3141     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3142     if (CoarseNullSpace) {
3143       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3144     }
3145     /* check coarse problem residual error */
3146     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3147     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3148     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3149     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3150     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3151     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3152     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3153     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3154     /* get eigenvalue estimation if preonly has not been requested */
3155     if (!ispreonly) {
3156       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3157       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3158       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3159     }
3160     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3161     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3162   }
3163   /* free memory */
3164   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3165   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 #undef __FUNCT__
3170 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3171 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3172 {
3173   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3174   PC_IS*         pcis = (PC_IS*)pc->data;
3175   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3176   PetscInt       i,j,coarse_size;
3177   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   /* get indices in local ordering for vertices and constraints */
3182   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3183   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3184   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3185   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3186   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3187   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3188   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3189 
3190   /* Compute global number of coarse dofs */
3191   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3192 
3193   /* check numbering */
3194   if (pcbddc->dbg_flag) {
3195     PetscScalar coarsesum,*array;
3196 
3197     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3198     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3199     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3200     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3201     for (i=0;i<pcbddc->local_primal_size;i++) {
3202       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3203     }
3204     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3205     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3206     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3207     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3208     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3209     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3211     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3212     for (i=0;i<pcis->n;i++) {
3213       if (array[i] == 1.0) {
3214         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3215       }
3216     }
3217     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3218     for (i=0;i<pcis->n;i++) {
3219       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3220     }
3221     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3222     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3223     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3224     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3225     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3226     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3227     if (pcbddc->dbg_flag > 1) {
3228       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3229       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3230       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3231       for (i=0;i<pcbddc->local_primal_size;i++) {
3232         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3233       }
3234       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3235     }
3236     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3237   }
3238   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3239   /* get back data */
3240   *coarse_size_n = coarse_size;
3241   *local_primal_indices_n = local_primal_indices;
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "PCBDDCGlobalToLocal"
3247 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3248 {
3249   PC_IS*         pcis = (PC_IS*)pc->data;
3250   IS             localis_t;
3251   PetscInt       i,lsize,*idxs;
3252   PetscScalar    *vals;
3253   PetscErrorCode ierr;
3254 
3255   PetscFunctionBegin;
3256   /* get dirichlet indices in local ordering exploiting local to global map */
3257   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3258   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3259   for (i=0;i<lsize;i++) vals[i] = 1.0;
3260   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3261   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3262   ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3263   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3264   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3265   ierr = PetscFree(vals);CHKERRQ(ierr);
3266   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3267   /* now compute dirichlet set in local ordering */
3268   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3269   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3270   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3271   for (i=0,lsize=0;i<pcis->n;i++) {
3272     if (vals[i] == 1.0) {
3273       lsize++;
3274     }
3275   }
3276   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3277   for (i=0,lsize=0;i<pcis->n;i++) {
3278     if (vals[i] == 1.0) {
3279       idxs[lsize++] = i;
3280     }
3281   }
3282   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3283   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3284   *localis = localis_t;
3285   PetscFunctionReturn(0);
3286 }
3287