xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 785d12438418abfe1dc590cf0e72c5e5ca69a49b)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   for (i=0;i<pcbddc->n_ISForDofs;i++) {
62     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
63   }
64   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
65   pcbddc->n_ISForDofs = 0;
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
79   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "PCBDDCResetSolvers"
85 PetscErrorCode PCBDDCResetSolvers(PC pc)
86 {
87   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
92   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "PCBDDCCreateWorkVectors"
113 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
114 {
115   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
116   PC_IS          *pcis = (PC_IS*)pc->data;
117   VecType        impVecType;
118   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
123   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
124   local_primal_size = n_constraints+n_vertices;
125   n_R = pcis->n-n_vertices;
126   /* local work vectors */
127   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
128   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
129   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
130   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
131   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
132   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
133   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
134   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
135   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
136   if (n_constraints) {
137     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
138     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
139     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "PCBDDCSetUpCorrection"
146 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
147 {
148   PetscErrorCode         ierr;
149   /* pointers to pcis and pcbddc */
150   PC_IS*                 pcis = (PC_IS*)pc->data;
151   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
152   /* submatrices of local problem */
153   Mat                    A_RV,A_VR,A_VV;
154   /* working matrices */
155   Mat                    M1,M2,M3,C_CR;
156   /* working vectors */
157   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
158   /* additional working stuff */
159   IS                     is_aux;
160   ISLocalToGlobalMapping BtoNmap;
161   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
162   const PetscScalar      *array,*row_cmat_values;
163   const PetscInt         *row_cmat_indices,*idx_R_local;
164   PetscInt               *vertices,*idx_V_B,*auxindices;
165   PetscInt               n_vertices,n_constraints,size_of_constraint;
166   PetscInt               i,j,n_R,n_D,n_B;
167   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
168   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
169   MatType                impMatType;
170   /* some shortcuts to scalars */
171   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
172   /* for debugging purposes */
173   PetscReal              *coarsefunctions_errors,*constraints_errors;
174 
175   PetscFunctionBegin;
176   /* get number of vertices and their local indices */
177   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
178   n_constraints = pcbddc->local_primal_size-n_vertices;
179   /* Set Non-overlapping dimensions */
180   n_B = pcis->n_B; n_D = pcis->n - n_B;
181   n_R = pcis->n-n_vertices;
182 
183   /* Set types for local objects needed by BDDC precondtioner */
184   impMatType = MATSEQDENSE;
185 
186   /* Allocating some extra storage just to be safe */
187   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
188   for (i=0;i<pcis->n;i++) auxindices[i]=i;
189 
190   /* vertices in boundary numbering */
191   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
193   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
194   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
195   if (i != n_vertices) {
196     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
197   }
198 
199   /* Precompute stuffs needed for preprocessing and application of BDDC*/
200   if (n_constraints) {
201     /* work vectors */
202     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
203     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
204     /* auxiliary matrices */
205     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
206     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
207     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
208     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
209 
210     /* Extract constraints on R nodes: C_{CR}  */
211     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
212     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
213     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
214 
215     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
216     for (i=0;i<n_constraints;i++) {
217       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
218       /* Get row of constraint matrix in R numbering */
219       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
221       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
222       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
223       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
224       /* Solve for row of constraint matrix in R numbering */
225       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
226       /* Set values in local_auxmat2 */
227       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
229       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
230     }
231     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
234 
235     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
236     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
237     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
238     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
239     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
240     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
241     ierr = MatSetUp(M1);CHKERRQ(ierr);
242     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
243     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
244     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
245     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
246     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
247     ierr = MatDestroy(&M2);CHKERRQ(ierr);
248     ierr = MatDestroy(&M3);CHKERRQ(ierr);
249     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
250     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
251   }
252 
253   /* Get submatrices from subdomain matrix */
254   if (n_vertices) {
255     PetscInt ibs,mbs;
256     PetscBool issbaij;
257     Mat newmat;
258 
259     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
260     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
261     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
262     if (ibs != mbs) { /* need to convert to SEQAIJ */
263       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
265       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
266       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
267       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
268     } else {
269       /* this is safe */
270       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
271       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
272       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
273         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
274         /* which of the two approaches is faster? */
275         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
277         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
278         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
279         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
280       } else {
281         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
282         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
283       }
284     }
285     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
286     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
287     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
288   }
289 
290   /* Matrix of coarse basis functions (local) */
291   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
292   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
293   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
294   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
295   if (pcbddc->switch_static || pcbddc->dbg_flag) {
296     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
297     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
298     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
299     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
300   }
301 
302   if (pcbddc->dbg_flag) {
303     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
304     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
305     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
306   }
307   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
308   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
309 
310   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
311 
312   /* vertices */
313   for (i=0;i<n_vertices;i++) {
314     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
315     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
316     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
317     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
318     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
319     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
320     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
321     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
322     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
323     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
324     if (n_constraints) {
325       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
326       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
327       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
328     }
329     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
330     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
331 
332     /* Set values in coarse basis function and subdomain part of coarse_mat */
333     /* coarse basis functions */
334     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
335     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
336     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
337     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
339     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
340     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
341     if (pcbddc->switch_static || pcbddc->dbg_flag) {
342       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
343       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
344       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
346       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
347     }
348     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
349     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
351     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
352     if (n_constraints) {
353       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
355       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
356     }
357 
358     /* check */
359     if (pcbddc->dbg_flag) {
360       /* assemble subdomain vector on local nodes */
361       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
362       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
365       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
366       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
367       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
368       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
369       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
370       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
372       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
373       if (n_constraints) {
374         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
375         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
376         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
377       }
378       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
380       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
381       /* check saddle point solution */
382       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
383       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
384       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
385       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
386       /* shift by the identity matrix */
387       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
388       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
389       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
390       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
391     }
392   }
393 
394   /* constraints */
395   for (i=0;i<n_constraints;i++) {
396     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
397     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
399     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
400     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
401     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
402     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
403     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
404     if (n_vertices) {
405       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
406     }
407     /* Set values in coarse basis function and subdomain part of coarse_mat */
408     /* coarse basis functions */
409     j = i+n_vertices; /* don't touch this! */
410     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
411     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
412     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
416     if (pcbddc->switch_static || pcbddc->dbg_flag) {
417       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
418       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
419       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
421       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
422     }
423     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
424     if (n_vertices) {
425       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
426       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
428     }
429     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
430     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
431     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
432 
433     if (pcbddc->dbg_flag) {
434       /* assemble subdomain vector on nodes */
435       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
436       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
438       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
439       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
440       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
441       /* assemble subdomain vector of lagrange multipliers */
442       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
443       if (n_vertices) {
444         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
445         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
446         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
447       }
448       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
450       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
454       /* check saddle point solution */
455       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
456       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
457       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
458       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
459       /* shift by the identity matrix */
460       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
461       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
462       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
463       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
464     }
465   }
466   /* call assembling routines for local coarse basis */
467   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469   if (pcbddc->switch_static || pcbddc->dbg_flag) {
470     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
472   }
473 
474   /* compute other basis functions for non-symmetric problems */
475   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
476   if (!setsym || (setsym && !issym)) {
477     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
478     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
479     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
480     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
483       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
484       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
485       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
486     }
487     for (i=0;i<pcbddc->local_primal_size;i++) {
488       if (n_constraints) {
489         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
490         for (j=0;j<n_constraints;j++) {
491           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
492         }
493         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
494         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
495       }
496       if (i<n_vertices) {
497         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
498         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
499         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
500         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
501         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
502         if (n_constraints) {
503           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
504         }
505       } else {
506         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
507       }
508       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
509       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
510       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
511       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
512       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
514       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
515       if (i<n_vertices) {
516         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
517       }
518       if (pcbddc->switch_static || pcbddc->dbg_flag) {
519         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
520         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
521         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
523         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
524       }
525 
526       if (pcbddc->dbg_flag) {
527         /* assemble subdomain vector on nodes */
528         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
529         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
531         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
532         if (i<n_vertices) {
533           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
534         }
535         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
536         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
537         /* assemble subdomain vector of lagrange multipliers */
538         for (j=0;j<pcbddc->local_primal_size;j++) {
539           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
540         }
541         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
542         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
543         /* check saddle point solution */
544         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
545         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
546         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
547         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
548         /* shift by the identity matrix */
549         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
550         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
551         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
552         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
553       }
554     }
555     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557     if (pcbddc->switch_static || pcbddc->dbg_flag) {
558       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     }
561   }
562   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
563   /* Checking coarse_sub_mat and coarse basis functios */
564   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
565   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
566   if (pcbddc->dbg_flag) {
567     Mat         coarse_sub_mat;
568     Mat         AUXMAT,TM1,TM2,TM3,TM4;
569     Mat         coarse_phi_D,coarse_phi_B;
570     Mat         coarse_psi_D,coarse_psi_B;
571     Mat         A_II,A_BB,A_IB,A_BI;
572     MatType     checkmattype=MATSEQAIJ;
573     PetscReal   real_value;
574 
575     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
576     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
577     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
578     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
579     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
580     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
581     if (pcbddc->coarse_psi_B) {
582       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
583       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
584     }
585     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
586 
587     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
588     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
589     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
590     if (pcbddc->coarse_psi_B) {
591       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
592       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
593       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
594       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
598       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
599       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
600       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
601       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
602       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
603     } else {
604       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
605       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
606       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
607       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
608       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
609       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
610       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
611       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
612     }
613     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
614     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
615     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
616     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
617     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
618     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
619     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
620     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
621     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
622     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
623     for (i=0;i<pcbddc->local_primal_size;i++) {
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
625     }
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
627     for (i=0;i<pcbddc->local_primal_size;i++) {
628       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
629     }
630     if (pcbddc->coarse_psi_B) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
632       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
633         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
634       }
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
636       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
637         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
638       }
639     }
640     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
641     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
645     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
646     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
647     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
648     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
649     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
650     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
651     if (pcbddc->coarse_psi_B) {
652       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
653       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
654     }
655     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
656     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
657     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
658     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
659   }
660   /* free memory */
661   if (n_vertices) {
662     ierr = PetscFree(vertices);CHKERRQ(ierr);
663     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
664     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
665     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
666     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
667     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
668   }
669   if (n_constraints) {
670     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
674   }
675   ierr = PetscFree(auxindices);CHKERRQ(ierr);
676   /* get back data */
677   *coarse_submat_vals_n = coarse_submat_vals;
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
683 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
684 {
685   PC_IS*            pcis = (PC_IS*)(pc->data);
686   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
687   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
688   PetscBool         issbaij,isbaij;
689   /* manage repeated solves */
690   MatReuse          reuse;
691   MatStructure      matstruct;
692   PetscErrorCode    ierr;
693 
694   PetscFunctionBegin;
695   /* get mat flags */
696   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
697   reuse = MAT_INITIAL_MATRIX;
698   if (pc->setupcalled) {
699     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
700     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
701     if (matstruct == SAME_NONZERO_PATTERN) {
702       reuse = MAT_REUSE_MATRIX;
703     } else {
704       reuse = MAT_INITIAL_MATRIX;
705     }
706   }
707   if (reuse == MAT_INITIAL_MATRIX) {
708     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
709     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
710     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
711     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
712     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
713   }
714 
715   /* transform local matrices if needed */
716   if (pcbddc->use_change_of_basis) {
717     Mat         change_mat_all;
718     PetscScalar *row_cmat_values;
719     PetscInt    *row_cmat_indices;
720     PetscInt    *nnz,*is_indices,*temp_indices;
721     PetscInt    i,j,k,n_D,n_B;
722 
723     /* Get Non-overlapping dimensions */
724     n_B = pcis->n_B;
725     n_D = pcis->n-n_B;
726 
727     /* compute nonzero structure of change of basis on all local nodes */
728     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
729     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
731     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     k=1;
734     for (i=0;i<n_B;i++) {
735       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
736       nnz[is_indices[i]]=j;
737       if (k < j) k = j;
738       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
739     }
740     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     /* assemble change of basis matrix on the whole set of local dofs */
742     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
743     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
744     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
745     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
746     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
747     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     for (i=0;i<n_D;i++) {
749       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
750     }
751     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
752     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
753     for (i=0;i<n_B;i++) {
754       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
755       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
756       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
757       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
758     }
759     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
760     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
761     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
762     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
763     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
764     if (!issbaij && !isbaij) {
765       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
766     } else {
767       Mat work_mat;
768       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
769       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
770       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
771     }
772     /*
773     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
774     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
775     */
776     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
777     ierr = PetscFree(nnz);CHKERRQ(ierr);
778     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
779   } else {
780     /* without change of basis, the local matrix is unchanged */
781     if (!pcbddc->local_mat) {
782       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
783       pcbddc->local_mat = matis->A;
784     }
785   }
786 
787   /* get submatrices */
788   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
789   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
790   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
791   if (!issbaij) {
792     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
793     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
794   } else {
795     Mat newmat;
796     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
797     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
798     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
799     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
806 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
807 {
808   PC_IS*         pcis = (PC_IS*)(pc->data);
809   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
810   IS             is_aux1,is_aux2;
811   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
812   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
813   PetscInt       vbs,bs;
814   PetscBT        bitmask;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   /* Set Non-overlapping dimensions */
819   n_B = pcis->n_B; n_D = pcis->n - n_B;
820   /* get vertex indices from constraint matrix */
821   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
822 
823   /* create auxiliary bitmask */
824   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
825   for (i=0;i<n_vertices;i++) {
826     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
827   }
828 
829   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
830   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
831   for (i=0, n_R=0; i<pcis->n; i++) {
832     if (!PetscBTLookup(bitmask,i)) {
833       idx_R_local[n_R] = i;
834       n_R++;
835     }
836   }
837 
838   /* Block code */
839   vbs = 1;
840   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
841   if (bs>1 && !(n_vertices%bs)) {
842     PetscBool is_blocked = PETSC_TRUE;
843     PetscInt  *vary;
844     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
845     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
846     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
847     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
848     for (i=0; i<n_vertices; i++) {
849       if (vary[i]!=0 && vary[i]!=bs) {
850         is_blocked = PETSC_FALSE;
851         break;
852       }
853     }
854     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
855       vbs = bs;
856       for (i=0;i<n_R/vbs;i++) {
857         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
858       }
859     }
860     ierr = PetscFree(vary);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(vertices);CHKERRQ(ierr);
863   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
864   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
865 
866   /* print some info if requested */
867   if (pcbddc->dbg_flag) {
868     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
869     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
870     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
871     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
872     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
873     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
874     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
875   }
876 
877   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
878   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
879   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
880   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
881   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
882   for (i=0; i<n_D; i++) {
883     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
884   }
885   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
886   for (i=0, j=0; i<n_R; i++) {
887     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
888       aux_array1[j++] = i;
889     }
890   }
891   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
892   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_B; i++) {
894     if (!PetscBTLookup(bitmask,is_indices[i])) {
895       aux_array2[j++] = i;
896     }
897   }
898   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
899   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
900   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
901   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
902   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
903 
904   if (pcbddc->switch_static || pcbddc->dbg_flag) {
905     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
906     for (i=0, j=0; i<n_R; i++) {
907       if (PetscBTLookup(bitmask,idx_R_local[i])) {
908         aux_array1[j++] = i;
909       }
910     }
911     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
912     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
913     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
914   }
915   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
916   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
923 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
924 {
925   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
926   PC_IS          *pcis = (PC_IS*)pc->data;
927   PC             pc_temp;
928   Mat            A_RR;
929   Vec            vec1,vec2,vec3;
930   MatStructure   matstruct;
931   PetscScalar    m_one = -1.0;
932   PetscReal      value;
933   PetscInt       n_D,n_R,ibs,mbs;
934   PetscBool      use_exact,use_exact_reduced,issbaij;
935   PetscErrorCode ierr;
936   /* prefixes stuff */
937   char           dir_prefix[256],neu_prefix[256],str_level[3];
938   size_t         len;
939 
940   PetscFunctionBegin;
941   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
942 
943   /* compute prefixes */
944   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
945   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
946   if (!pcbddc->current_level) {
947     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
948     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
949     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
950     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
951   } else {
952     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
953     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
954     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
955     len -= 15; /* remove "pc_bddc_coarse_" */
956     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
957     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
958     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
959     *(dir_prefix+len)='\0';
960     *(neu_prefix+len)='\0';
961     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
962     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
963     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
964     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
965   }
966 
967   /* DIRICHLET PROBLEM */
968   /* Matrix for Dirichlet problem is pcis->A_II */
969   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
970   if (!pcbddc->ksp_D) { /* create object if not yet build */
971     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
972     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
973     /* default */
974     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
975     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
976     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
977     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
978     if (issbaij) {
979       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
980     } else {
981       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
982     }
983     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
984   }
985   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
986   /* Allow user's customization */
987   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
988   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
989   if (!n_D) {
990     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
991     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
992   }
993   /* Set Up KSP for Dirichlet problem of BDDC */
994   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* set ksp_D into pcis data */
996   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
997   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
998   pcis->ksp_D = pcbddc->ksp_D;
999 
1000   /* NEUMANN PROBLEM */
1001   /* Matrix for Neumann problem is A_RR -> we need to create it */
1002   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1003   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1004   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1005   if (ibs != mbs) {
1006     Mat newmat;
1007     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1008     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1009     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1010   } else {
1011     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1012   }
1013   if (!pcbddc->ksp_R) { /* create object if not yet build */
1014     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1015     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1016     /* default */
1017     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1018     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1019     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1020     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1021     if (issbaij) {
1022       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1023     } else {
1024       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1025     }
1026     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1027   }
1028   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1029   /* Allow user's customization */
1030   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1031   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1032   if (!n_R) {
1033     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1034     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1035   }
1036   /* Set Up KSP for Neumann problem of BDDC */
1037   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1038 
1039   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1040 
1041   /* Dirichlet */
1042   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1043   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1044   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1045   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1046   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1047   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1048   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1049   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1050   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1051   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1052   /* need to be adapted? */
1053   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1054   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1055   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1056   /* print info */
1057   if (pcbddc->dbg_flag) {
1058     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1059     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1060     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1061     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1062     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1063   }
1064   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1065     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1066   }
1067 
1068   /* Neumann */
1069   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1070   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1071   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1072   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1073   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1074   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1075   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1076   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1077   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1078   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1079   /* need to be adapted? */
1080   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1081   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1082   /* print info */
1083   if (pcbddc->dbg_flag) {
1084     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1085     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1086   }
1087   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1088     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1089   }
1090   /* free Neumann problem's matrix */
1091   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1097 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1098 {
1099   PetscErrorCode ierr;
1100   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1101 
1102   PetscFunctionBegin;
1103   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1104   if (pcbddc->local_auxmat1) {
1105     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1106     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1113 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1114 {
1115   PetscErrorCode ierr;
1116   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1117   PC_IS*            pcis = (PC_IS*)  (pc->data);
1118   const PetscScalar zero = 0.0;
1119 
1120   PetscFunctionBegin;
1121   /* Application of PHI^T (or PSI^T)  */
1122   if (pcbddc->coarse_psi_B) {
1123     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1124     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1125   } else {
1126     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1127     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1128   }
1129   /* Scatter data of coarse_rhs */
1130   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1131   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1132 
1133   /* Local solution on R nodes */
1134   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1135   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137   if (pcbddc->switch_static) {
1138     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1140   }
1141   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1142   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1143   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   if (pcbddc->switch_static) {
1146     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148   }
1149 
1150   /* Coarse solution */
1151   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1153     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1154   }
1155   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157 
1158   /* Sum contributions from two levels */
1159   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1160   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1166 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1167 {
1168   PetscErrorCode ierr;
1169   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1170 
1171   PetscFunctionBegin;
1172   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1178 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1179 {
1180   PetscErrorCode ierr;
1181   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /* uncomment for testing purposes */
1189 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1190 #undef __FUNCT__
1191 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1192 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1193 {
1194   PetscErrorCode    ierr;
1195   PC_IS*            pcis = (PC_IS*)(pc->data);
1196   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1197   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1198   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1199   MatType           impMatType=MATSEQAIJ;
1200   /* one and zero */
1201   PetscScalar       one=1.0,zero=0.0;
1202   /* space to store constraints and their local indices */
1203   PetscScalar       *temp_quadrature_constraint;
1204   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1205   /* iterators */
1206   PetscInt          i,j,k,total_counts,temp_start_ptr;
1207   /* stuff to store connected components stored in pcbddc->mat_graph */
1208   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1209   PetscInt          n_ISForFaces,n_ISForEdges;
1210   /* near null space stuff */
1211   MatNullSpace      nearnullsp;
1212   const Vec         *nearnullvecs;
1213   Vec               *localnearnullsp;
1214   PetscBool         nnsp_has_cnst;
1215   PetscInt          nnsp_size;
1216   PetscScalar       *array;
1217   /* BLAS integers */
1218   PetscBLASInt      lwork,lierr;
1219   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1220   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1221   /* LAPACK working arrays for SVD or POD */
1222   PetscBool         skip_lapack;
1223   PetscScalar       *work;
1224   PetscReal         *singular_vals;
1225 #if defined(PETSC_USE_COMPLEX)
1226   PetscReal         *rwork;
1227 #endif
1228 #if defined(PETSC_MISSING_LAPACK_GESVD)
1229   PetscBLASInt      Blas_one_2=1;
1230   PetscScalar       *temp_basis,*correlation_mat;
1231 #else
1232   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1233   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1234 #endif
1235   /* change of basis */
1236   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1237   PetscBool         boolforchange,qr_needed;
1238   PetscBT           touched,change_basis,qr_needed_idx;
1239   /* auxiliary stuff */
1240   PetscInt          *nnz,*is_indices,*local_to_B;
1241   /* some quantities */
1242   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1243   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1244 
1245 
1246   PetscFunctionBegin;
1247   /* Get index sets for faces, edges and vertices from graph */
1248   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1249     pcbddc->use_vertices = PETSC_TRUE;
1250   }
1251   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1252   /* HACK: provide functions to set change of basis */
1253   if (!ISForVertices && pcbddc->NullSpace) {
1254     pcbddc->use_change_of_basis = PETSC_TRUE;
1255     pcbddc->use_change_on_faces = PETSC_FALSE;
1256   }
1257   /* print some info */
1258   if (pcbddc->dbg_flag) {
1259     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1260     i = 0;
1261     if (ISForVertices) {
1262       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1267     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1268   }
1269   /* check if near null space is attached to global mat */
1270   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1271   if (nearnullsp) {
1272     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1273   } else { /* if near null space is not provided BDDC uses constants by default */
1274     nnsp_size = 0;
1275     nnsp_has_cnst = PETSC_TRUE;
1276   }
1277   /* get max number of constraints on a single cc */
1278   max_constraints = nnsp_size;
1279   if (nnsp_has_cnst) max_constraints++;
1280 
1281   /*
1282        Evaluate maximum storage size needed by the procedure
1283        - temp_indices will contain start index of each constraint stored as follows
1284        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1285        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1286        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1287                                                                                                                                                          */
1288   total_counts = n_ISForFaces+n_ISForEdges;
1289   total_counts *= max_constraints;
1290   n_vertices = 0;
1291   if (ISForVertices) {
1292     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1293   }
1294   total_counts += n_vertices;
1295   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1296   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1297   total_counts = 0;
1298   max_size_of_constraint = 0;
1299   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1300     if (i<n_ISForEdges) {
1301       used_IS = &ISForEdges[i];
1302     } else {
1303       used_IS = &ISForFaces[i-n_ISForEdges];
1304     }
1305     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1306     total_counts += j;
1307     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1308   }
1309   total_counts *= max_constraints;
1310   total_counts += n_vertices;
1311   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1312   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1313   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1314   /* local to boundary numbering */
1315   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1316   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1317   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1318   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1319   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1320   /* get local part of global near null space vectors */
1321   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1322   for (k=0;k<nnsp_size;k++) {
1323     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1324     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326   }
1327 
1328   /* whether or not to skip lapack calls */
1329   skip_lapack = PETSC_TRUE;
1330   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1331 
1332   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1333   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1334     PetscScalar temp_work;
1335 #if defined(PETSC_MISSING_LAPACK_GESVD)
1336     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1337     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1338     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1339     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1340 #if defined(PETSC_USE_COMPLEX)
1341     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1342 #endif
1343     /* now we evaluate the optimal workspace using query with lwork=-1 */
1344     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1345     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1346     lwork = -1;
1347     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1348 #if !defined(PETSC_USE_COMPLEX)
1349     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1350 #else
1351     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1352 #endif
1353     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1354     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1355 #else /* on missing GESVD */
1356     /* SVD */
1357     PetscInt max_n,min_n;
1358     max_n = max_size_of_constraint;
1359     min_n = max_constraints;
1360     if (max_size_of_constraint < max_constraints) {
1361       min_n = max_size_of_constraint;
1362       max_n = max_constraints;
1363     }
1364     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1365 #if defined(PETSC_USE_COMPLEX)
1366     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1367 #endif
1368     /* now we evaluate the optimal workspace using query with lwork=-1 */
1369     lwork = -1;
1370     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1371     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1372     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1373     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1374 #if !defined(PETSC_USE_COMPLEX)
1375     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1376 #else
1377     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1378 #endif
1379     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1380     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1381 #endif /* on missing GESVD */
1382     /* Allocate optimal workspace */
1383     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1384     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1385   }
1386   /* Now we can loop on constraining sets */
1387   total_counts = 0;
1388   temp_indices[0] = 0;
1389   /* vertices */
1390   if (ISForVertices) {
1391     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1392     if (nnsp_has_cnst) { /* consider all vertices */
1393       for (i=0;i<n_vertices;i++) {
1394         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1395         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1396         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1397         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1398         total_counts++;
1399       }
1400     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1401       PetscBool used_vertex;
1402       for (i=0;i<n_vertices;i++) {
1403         used_vertex = PETSC_FALSE;
1404         k = 0;
1405         while (!used_vertex && k<nnsp_size) {
1406           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1407           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1408             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1409             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1410             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1411             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1412             total_counts++;
1413             used_vertex = PETSC_TRUE;
1414           }
1415           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1416           k++;
1417         }
1418       }
1419     }
1420     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1421     n_vertices = total_counts;
1422   }
1423 
1424   /* edges and faces */
1425   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1426     if (i<n_ISForEdges) {
1427       used_IS = &ISForEdges[i];
1428       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1429     } else {
1430       used_IS = &ISForFaces[i-n_ISForEdges];
1431       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1432     }
1433     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1434     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1435     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1436     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1437     /* change of basis should not be performed on local periodic nodes */
1438     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1439     if (nnsp_has_cnst) {
1440       PetscScalar quad_value;
1441       temp_constraints++;
1442       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1443       for (j=0;j<size_of_constraint;j++) {
1444         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1445         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1446         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1447       }
1448       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1449       total_counts++;
1450     }
1451     for (k=0;k<nnsp_size;k++) {
1452       PetscReal real_value;
1453       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1454       for (j=0;j<size_of_constraint;j++) {
1455         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1456         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1457         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1458       }
1459       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1460       /* check if array is null on the connected component */
1461       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1462       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1463       if (real_value > 0.0) { /* keep indices and values */
1464         temp_constraints++;
1465         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1466         total_counts++;
1467       }
1468     }
1469     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1470     valid_constraints = temp_constraints;
1471     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1472     if (!pcbddc->use_nnsp_true && temp_constraints) {
1473       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1474 
1475 #if defined(PETSC_MISSING_LAPACK_GESVD)
1476       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1477          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1478          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1479             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1480             from that computed using LAPACKgesvd
1481          -> This is due to a different computation of eigenvectors in LAPACKheev
1482          -> The quality of the POD-computed basis will be the same */
1483       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1484       /* Store upper triangular part of correlation matrix */
1485       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1486       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1487       for (j=0;j<temp_constraints;j++) {
1488         for (k=0;k<j+1;k++) {
1489           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1490         }
1491       }
1492       /* compute eigenvalues and eigenvectors of correlation matrix */
1493       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1494       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1495 #if !defined(PETSC_USE_COMPLEX)
1496       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1497 #else
1498       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1499 #endif
1500       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1501       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1502       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1503       j=0;
1504       while (j < temp_constraints && singular_vals[j] < tol) j++;
1505       total_counts=total_counts-j;
1506       valid_constraints = temp_constraints-j;
1507       /* scale and copy POD basis into used quadrature memory */
1508       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1514       if (j<temp_constraints) {
1515         PetscInt ii;
1516         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1517         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1518         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1519         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1520         for (k=0;k<temp_constraints-j;k++) {
1521           for (ii=0;ii<size_of_constraint;ii++) {
1522             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1523           }
1524         }
1525       }
1526 #else  /* on missing GESVD */
1527       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1528       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1529       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1530       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1531 #if !defined(PETSC_USE_COMPLEX)
1532       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1533 #else
1534       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1535 #endif
1536       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1537       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1538       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1539       k = temp_constraints;
1540       if (k > size_of_constraint) k = size_of_constraint;
1541       j = 0;
1542       while (j < k && singular_vals[k-j-1] < tol) j++;
1543       total_counts = total_counts-temp_constraints+k-j;
1544       valid_constraints = k-j;
1545 #endif /* on missing GESVD */
1546     }
1547     /* setting change_of_basis flag is safe now */
1548     if (boolforchange) {
1549       for (j=0;j<valid_constraints;j++) {
1550         PetscBTSet(change_basis,total_counts-j-1);
1551       }
1552     }
1553   }
1554   /* free index sets of faces, edges and vertices */
1555   for (i=0;i<n_ISForFaces;i++) {
1556     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1559   for (i=0;i<n_ISForEdges;i++) {
1560     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1561   }
1562   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1563   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1564 
1565   /* free workspace */
1566   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1567     ierr = PetscFree(work);CHKERRQ(ierr);
1568 #if defined(PETSC_USE_COMPLEX)
1569     ierr = PetscFree(rwork);CHKERRQ(ierr);
1570 #endif
1571     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1572 #if defined(PETSC_MISSING_LAPACK_GESVD)
1573     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1574     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1575 #endif
1576   }
1577   for (k=0;k<nnsp_size;k++) {
1578     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1579   }
1580   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1581 
1582   /* set quantities in pcbddc data structure */
1583   /* n_vertices defines the number of subdomain corners in the primal space */
1584   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1585   pcbddc->local_primal_size = total_counts;
1586   pcbddc->n_vertices = n_vertices;
1587   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1588 
1589   /* Create constraint matrix */
1590   /* The constraint matrix is used to compute the l2g map of primal dofs */
1591   /* so we need to set it up properly either with or without change of basis */
1592   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1593   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1594   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1595   /* array to compute a local numbering of constraints : vertices first then constraints */
1596   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1597   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1598   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1599   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1600   /* auxiliary stuff for basis change */
1601   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1602   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1603 
1604   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1605   total_primal_vertices=0;
1606   for (i=0;i<pcbddc->local_primal_size;i++) {
1607     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1608     if (size_of_constraint == 1) {
1609       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1610       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1611       aux_primal_minloc[total_primal_vertices]=0;
1612       total_primal_vertices++;
1613     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1614       PetscInt min_loc,min_index;
1615       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1616       /* find first untouched local node */
1617       k = 0;
1618       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1619       min_index = global_indices[k];
1620       min_loc = k;
1621       /* search the minimum among global nodes already untouched on the cc */
1622       for (k=1;k<size_of_constraint;k++) {
1623         /* there can be more than one constraint on a single connected component */
1624         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1625           min_index = global_indices[k];
1626           min_loc = k;
1627         }
1628       }
1629       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1630       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1631       aux_primal_minloc[total_primal_vertices]=min_loc;
1632       total_primal_vertices++;
1633     }
1634   }
1635   /* determine if a QR strategy is needed for change of basis */
1636   qr_needed = PETSC_FALSE;
1637   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1638   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1639     if (PetscBTLookup(change_basis,i)) {
1640       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1641       j = 0;
1642       for (k=0;k<size_of_constraint;k++) {
1643         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1644           j++;
1645         }
1646       }
1647       /* found more than one primal dof on the cc */
1648       if (j > 1) {
1649         PetscBTSet(qr_needed_idx,i);
1650         qr_needed = PETSC_TRUE;
1651       }
1652     }
1653   }
1654   /* free workspace */
1655   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1656 
1657   /* permute indices in order to have a sorted set of vertices */
1658   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1659 
1660   /* nonzero structure of constraint matrix */
1661   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1662   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1663   j=total_primal_vertices;
1664   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1665     if (!PetscBTLookup(change_basis,i)) {
1666       nnz[j]=temp_indices[i+1]-temp_indices[i];
1667       j++;
1668     }
1669   }
1670   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1671   ierr = PetscFree(nnz);CHKERRQ(ierr);
1672   /* set values in constraint matrix */
1673   for (i=0;i<total_primal_vertices;i++) {
1674     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1675   }
1676   total_counts = total_primal_vertices;
1677   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1678     if (!PetscBTLookup(change_basis,i)) {
1679       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1680       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1681       total_counts++;
1682     }
1683   }
1684   /* assembling */
1685   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   /*
1688   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1689   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1690   */
1691   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1692   if (pcbddc->use_change_of_basis) {
1693     /* dual and primal dofs on a single cc */
1694     PetscInt     dual_dofs,primal_dofs;
1695     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1696     PetscInt     primal_counter;
1697     /* working stuff for GEQRF */
1698     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1699     PetscBLASInt lqr_work;
1700     /* working stuff for UNGQR */
1701     PetscScalar  *gqr_work,lgqr_work_t;
1702     PetscBLASInt lgqr_work;
1703     /* working stuff for TRTRS */
1704     PetscScalar  *trs_rhs;
1705     PetscBLASInt Blas_NRHS;
1706     /* pointers for values insertion into change of basis matrix */
1707     PetscInt     *start_rows,*start_cols;
1708     PetscScalar  *start_vals;
1709     /* working stuff for values insertion */
1710     PetscBT      is_primal;
1711 
1712     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1713     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1714     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1715     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1716     /* work arrays */
1717     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1718     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1719     /* nonzeros per row */
1720     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1721       if (PetscBTLookup(change_basis,i)) {
1722         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1723         if (PetscBTLookup(qr_needed_idx,i)) {
1724           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1725         } else {
1726           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1727           /* get local primal index on the cc */
1728           j = 0;
1729           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1730           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1731         }
1732       }
1733     }
1734     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1735     ierr = PetscFree(nnz);CHKERRQ(ierr);
1736     /* Set initial identity in the matrix */
1737     for (i=0;i<pcis->n_B;i++) {
1738       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1739     }
1740 
1741     if (pcbddc->dbg_flag) {
1742       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1743       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1744     }
1745 
1746 
1747     /* Now we loop on the constraints which need a change of basis */
1748     /*
1749        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1750        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1751 
1752        Basic blocks of change of basis matrix T computed
1753 
1754           - Using the following block transformation if there is only a primal dof on the cc
1755             (in the example, primal dof is the last one of the edge in LOCAL ordering
1756              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1757             | 1        0   ...        0     1 |
1758             | 0        1   ...        0     1 |
1759             |              ...                |
1760             | 0        ...            1     1 |
1761             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1762 
1763           - via QR decomposition of constraints otherwise
1764     */
1765     if (qr_needed) {
1766       /* space to store Q */
1767       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1768       /* first we issue queries for optimal work */
1769       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1770       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1771       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1772       lqr_work = -1;
1773       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1774       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1775       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1776       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1777       lgqr_work = -1;
1778       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1779       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1780       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1781       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1782       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1783       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1784       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1785       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1786       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1787       /* array to store scaling factors for reflectors */
1788       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1789       /* array to store rhs and solution of triangular solver */
1790       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1791       /* allocating workspace for check */
1792       if (pcbddc->dbg_flag) {
1793         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1794       }
1795     }
1796     /* array to store whether a node is primal or not */
1797     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1798     for (i=0;i<total_primal_vertices;i++) {
1799       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1800     }
1801 
1802     /* loop on constraints and see whether or not they need a change of basis and compute it */
1803     /* -> using implicit ordering contained in temp_indices data */
1804     total_counts = pcbddc->n_vertices;
1805     primal_counter = total_counts;
1806     while (total_counts<pcbddc->local_primal_size) {
1807       primal_dofs = 1;
1808       if (PetscBTLookup(change_basis,total_counts)) {
1809         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1810         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1811           primal_dofs++;
1812         }
1813         /* get constraint info */
1814         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1815         dual_dofs = size_of_constraint-primal_dofs;
1816 
1817         if (pcbddc->dbg_flag) {
1818           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1819         }
1820 
1821         if (primal_dofs > 1) { /* QR */
1822 
1823           /* copy quadrature constraints for change of basis check */
1824           if (pcbddc->dbg_flag) {
1825             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1826           }
1827           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1828           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1829 
1830           /* compute QR decomposition of constraints */
1831           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1832           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1833           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1834           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1835           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1836           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1837           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1838 
1839           /* explictly compute R^-T */
1840           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1841           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1842           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1843           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1844           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1845           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1846           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1847           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1848           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1849           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1850 
1851           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1852           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1854           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1855           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1856           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1857           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1858           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1859           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1860 
1861           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1862              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1863              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1864           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1869           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1870           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1871           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1872           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1873           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1874 
1875           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1876           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1877           /* insert cols for primal dofs */
1878           for (j=0;j<primal_dofs;j++) {
1879             start_vals = &qr_basis[j*size_of_constraint];
1880             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1881             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1882           }
1883           /* insert cols for dual dofs */
1884           for (j=0,k=0;j<dual_dofs;k++) {
1885             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1886               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1887               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1888               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1889               j++;
1890             }
1891           }
1892 
1893           /* check change of basis */
1894           if (pcbddc->dbg_flag) {
1895             PetscInt   ii,jj;
1896             PetscBool valid_qr=PETSC_TRUE;
1897             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1898             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1899             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1901             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1902             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1903             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1904             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1905             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1906             for (jj=0;jj<size_of_constraint;jj++) {
1907               for (ii=0;ii<primal_dofs;ii++) {
1908                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1909                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1910               }
1911             }
1912             if (!valid_qr) {
1913               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1914               for (jj=0;jj<size_of_constraint;jj++) {
1915                 for (ii=0;ii<primal_dofs;ii++) {
1916                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1917                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1918                   }
1919                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1920                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1921                   }
1922                 }
1923               }
1924             } else {
1925               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1926             }
1927           }
1928         } else { /* simple transformation block */
1929           PetscInt row,col;
1930           PetscScalar val;
1931           for (j=0;j<size_of_constraint;j++) {
1932             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1933             if (!PetscBTLookup(is_primal,row)) {
1934               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1935               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1936               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1937             } else {
1938               for (k=0;k<size_of_constraint;k++) {
1939                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1940                 if (row != col) {
1941                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1942                 } else {
1943                   val = 1.0;
1944                 }
1945                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1946               }
1947             }
1948           }
1949         }
1950         /* increment primal counter */
1951         primal_counter += primal_dofs;
1952       } else {
1953         if (pcbddc->dbg_flag) {
1954           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1955         }
1956       }
1957       /* increment constraint counter total_counts */
1958       total_counts += primal_dofs;
1959     }
1960 
1961     /* free workspace */
1962     if (qr_needed) {
1963       if (pcbddc->dbg_flag) {
1964         ierr = PetscFree(work);CHKERRQ(ierr);
1965       }
1966       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1967       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1968       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1969       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1970       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1971     }
1972     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1973     /* assembling */
1974     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1976     /*
1977     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1978     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1979     */
1980   }
1981 
1982   /* flush dbg viewer */
1983   if (pcbddc->dbg_flag) {
1984     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1985   }
1986 
1987   /* free workspace */
1988   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1989   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1990   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1991   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1992   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1993   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1994   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1995   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1996   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1997   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2003 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2004 {
2005   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2006   PC_IS       *pcis = (PC_IS*)pc->data;
2007   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2008   PetscInt    bs,ierr,i,vertex_size;
2009   PetscViewer viewer=pcbddc->dbg_viewer;
2010 
2011   PetscFunctionBegin;
2012   /* Init local Graph struct */
2013   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2014 
2015   /* Check validity of the csr graph passed in by the user */
2016   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2017     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2018   }
2019 
2020   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2021   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2022     Mat mat_adj;
2023     const PetscInt *xadj,*adjncy;
2024     PetscBool flg_row=PETSC_TRUE;
2025 
2026     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2027     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2028     if (!flg_row) {
2029       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2030     }
2031     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2032     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2033     if (!flg_row) {
2034       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2035     }
2036     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2037   }
2038 
2039   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2040   vertex_size = 1;
2041   if (!pcbddc->user_provided_isfordofs) {
2042     if (!pcbddc->n_ISForDofs) {
2043       IS *custom_ISForDofs;
2044 
2045       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2046       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2047       for (i=0;i<bs;i++) {
2048         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2049       }
2050       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2051       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2052       /* remove my references to IS objects */
2053       for (i=0;i<bs;i++) {
2054         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2055       }
2056       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2057     }
2058   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2059     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2060   }
2061 
2062   /* Setup of Graph */
2063   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2064     PetscInt    i,lsize,*idxs;
2065     PetscScalar *vals;
2066 
2067     /* get dirichlet indices in local ordering exploiting local to global map */
2068     ierr = ISGetLocalSize(pcbddc->DirichletBoundaries,&lsize);CHKERRQ(ierr);
2069     ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2070     for (i=0;i<lsize;i++) vals[i] = 1.0;
2071     ierr = ISGetIndices(pcbddc->DirichletBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr);
2072     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2073     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
2074     ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
2075     ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr);
2076     ierr = PetscFree(vals);CHKERRQ(ierr);
2077     ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
2078     /* now compute dirichlet set in local ordering */
2079     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2081     ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
2082     for (i=0,lsize=0;i<pcis->n;i++) {
2083       if (vals[i] == 1.0) {
2084         lsize++;
2085       }
2086     }
2087     ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
2088     for (i=0,lsize=0;i<pcis->n;i++) {
2089       if (vals[i] == 1.0) {
2090         idxs[lsize++] = i;
2091       }
2092     }
2093     ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
2094     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2095   }
2096   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2097     PetscInt    i,lsize,*idxs;
2098     PetscScalar *vals;
2099 
2100     /* get dirichlet indices in local ordering exploiting local to global map */
2101     ierr = ISGetLocalSize(pcbddc->NeumannBoundaries,&lsize);CHKERRQ(ierr);
2102     ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2103     for (i=0;i<lsize;i++) vals[i] = 1.0;
2104     ierr = ISGetIndices(pcbddc->NeumannBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr);
2105     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2106     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
2107     ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
2108     ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr);
2109     ierr = PetscFree(vals);CHKERRQ(ierr);
2110     ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
2111     /* now compute dirichlet set in local ordering */
2112     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114     ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
2115     for (i=0,lsize=0;i<pcis->n;i++) {
2116       if (vals[i] == 1.0) {
2117         lsize++;
2118       }
2119     }
2120     ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
2121     for (i=0,lsize=0;i<pcis->n;i++) {
2122       if (vals[i] == 1.0) {
2123         idxs[lsize++] = i;
2124       }
2125     }
2126     ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
2127     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2128   }
2129   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2130 
2131   /* Graph's connected components analysis */
2132   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2133 
2134   /* print some info to stdout */
2135   if (pcbddc->dbg_flag) {
2136     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2137   }
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2143 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2144 {
2145   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2146   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   n = 0;
2151   vertices = 0;
2152   if (pcbddc->ConstraintMatrix) {
2153     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2154     for (i=0;i<local_primal_size;i++) {
2155       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2156       if (size_of_constraint == 1) n++;
2157       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2158     }
2159     if (vertices_idx) {
2160       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2161       n = 0;
2162       for (i=0;i<local_primal_size;i++) {
2163         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2164         if (size_of_constraint == 1) {
2165           vertices[n++]=row_cmat_indices[0];
2166         }
2167         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2168       }
2169     }
2170   }
2171   *n_vertices = n;
2172   if (vertices_idx) *vertices_idx = vertices;
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2178 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2179 {
2180   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2181   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2182   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2183   PetscBT        touched;
2184   PetscErrorCode ierr;
2185 
2186     /* This function assumes that the number of local constraints per connected component
2187        is not greater than the number of nodes defined for the connected component
2188        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2189   PetscFunctionBegin;
2190   n = 0;
2191   constraints_index = 0;
2192   if (pcbddc->ConstraintMatrix) {
2193     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2194     max_size_of_constraint = 0;
2195     for (i=0;i<local_primal_size;i++) {
2196       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2197       if (size_of_constraint > 1) {
2198         n++;
2199       }
2200       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2201       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2202     }
2203     if (constraints_idx) {
2204       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2205       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2206       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2207       n = 0;
2208       for (i=0;i<local_primal_size;i++) {
2209         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2210         if (size_of_constraint > 1) {
2211           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2212           /* find first untouched local node */
2213           j = 0;
2214           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2215           min_index = row_cmat_global_indices[j];
2216           min_loc = j;
2217           /* search the minimum among nodes not yet touched on the connected component
2218              since there can be more than one constraint on a single cc */
2219           for (j=1;j<size_of_constraint;j++) {
2220             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2221               min_index = row_cmat_global_indices[j];
2222               min_loc = j;
2223             }
2224           }
2225           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2226           constraints_index[n++] = row_cmat_indices[min_loc];
2227         }
2228         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2229       }
2230       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2231       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2232     }
2233   }
2234   *n_constraints = n;
2235   if (constraints_idx) *constraints_idx = constraints_index;
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /* the next two functions has been adapted from pcis.c */
2240 #undef __FUNCT__
2241 #define __FUNCT__ "PCBDDCApplySchur"
2242 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2243 {
2244   PetscErrorCode ierr;
2245   PC_IS          *pcis = (PC_IS*)(pc->data);
2246 
2247   PetscFunctionBegin;
2248   if (!vec2_B) { vec2_B = v; }
2249   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2250   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2251   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2252   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2253   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2259 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2260 {
2261   PetscErrorCode ierr;
2262   PC_IS          *pcis = (PC_IS*)(pc->data);
2263 
2264   PetscFunctionBegin;
2265   if (!vec2_B) { vec2_B = v; }
2266   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2267   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2268   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2269   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2270   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "PCBDDCSubsetNumbering"
2276 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2277 {
2278   Vec            local_vec,global_vec;
2279   IS             seqis,paris;
2280   VecScatter     scatter_ctx;
2281   PetscScalar    *array;
2282   PetscInt       *temp_global_dofs;
2283   PetscScalar    globalsum;
2284   PetscInt       i,j,s;
2285   PetscInt       nlocals,first_index,old_index,max_local;
2286   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2287   PetscMPIInt    *dof_sizes,*dof_displs;
2288   PetscBool      first_found;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   /* mpi buffers */
2293   MPI_Comm_size(comm,&size_prec_comm);
2294   MPI_Comm_rank(comm,&rank_prec_comm);
2295   j = ( !rank_prec_comm ? size_prec_comm : 0);
2296   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2297   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2298   /* get maximum size of subset */
2299   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2300   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2301   max_local = 0;
2302   if (n_local_dofs) {
2303     max_local = temp_global_dofs[0];
2304     for (i=1;i<n_local_dofs;i++) {
2305       if (max_local < temp_global_dofs[i] ) {
2306         max_local = temp_global_dofs[i];
2307       }
2308     }
2309   }
2310   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2311   max_global++;
2312   max_local = 0;
2313   if (n_local_dofs) {
2314     max_local = local_dofs[0];
2315     for (i=1;i<n_local_dofs;i++) {
2316       if (max_local < local_dofs[i] ) {
2317         max_local = local_dofs[i];
2318       }
2319     }
2320   }
2321   max_local++;
2322   /* allocate workspace */
2323   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2324   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2325   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2326   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2327   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2328   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2329   /* create scatter */
2330   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2331   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2332   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2333   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2334   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2335   /* init array */
2336   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2337   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2338   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2339   if (local_dofs_mult) {
2340     for (i=0;i<n_local_dofs;i++) {
2341       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2342     }
2343   } else {
2344     for (i=0;i<n_local_dofs;i++) {
2345       array[local_dofs[i]]=1.0;
2346     }
2347   }
2348   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2349   /* scatter into global vec and get total number of global dofs */
2350   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2351   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2352   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2353   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2354   /* Fill global_vec with cumulative function for global numbering */
2355   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2356   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2357   nlocals = 0;
2358   first_index = -1;
2359   first_found = PETSC_FALSE;
2360   for (i=0;i<s;i++) {
2361     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2362       first_found = PETSC_TRUE;
2363       first_index = i;
2364     }
2365     nlocals += (PetscInt)PetscRealPart(array[i]);
2366   }
2367   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2368   if (!rank_prec_comm) {
2369     dof_displs[0]=0;
2370     for (i=1;i<size_prec_comm;i++) {
2371       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2372     }
2373   }
2374   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2375   if (first_found) {
2376     array[first_index] += (PetscScalar)nlocals;
2377     old_index = first_index;
2378     for (i=first_index+1;i<s;i++) {
2379       if (PetscRealPart(array[i]) > 0.0) {
2380         array[i] += array[old_index];
2381         old_index = i;
2382       }
2383     }
2384   }
2385   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2386   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2387   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2388   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2389   /* get global ordering of local dofs */
2390   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2391   if (local_dofs_mult) {
2392     for (i=0;i<n_local_dofs;i++) {
2393       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2394     }
2395   } else {
2396     for (i=0;i<n_local_dofs;i++) {
2397       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2398     }
2399   }
2400   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2401   /* free workspace */
2402   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2403   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2404   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2405   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2406   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2407   /* return pointer to global ordering of local dofs */
2408   *global_numbering_subset = temp_global_dofs;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2414 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2415 {
2416   PetscInt       i,j;
2417   PetscScalar    *alphas;
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   /* this implements stabilized Gram-Schmidt */
2422   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2423   for (i=0;i<n;i++) {
2424     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2425     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2426     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2427   }
2428   ierr = PetscFree(alphas);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatConvert_IS_AIJ"
2435 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2436 {
2437   Mat new_mat;
2438   Mat_IS *matis = (Mat_IS*)(mat->data);
2439   /* info on mat */
2440   PetscInt bs,rows,cols;
2441   PetscInt lrows,lcols;
2442   PetscInt local_rows,local_cols;
2443   PetscMPIInt nsubdomains;
2444   /* preallocation */
2445   Vec vec_dnz,vec_onz;
2446   PetscScalar *my_dnz,*my_onz,*array;
2447   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2448   PetscInt  index_row,index_col,owner;
2449   PetscInt  *local_indices,*global_indices;
2450   /* work */
2451   PetscInt i,j;
2452   PetscErrorCode ierr;
2453 
2454   PetscFunctionBegin;
2455   /* CHECKS */
2456   /* get info from mat */
2457   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2458   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2459   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2460   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2461 
2462   /* MAT_INITIAL_MATRIX */
2463   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2464   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2465   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2466   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2467   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2468   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2469 
2470   /*
2471     preallocation
2472   */
2473 
2474   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2475   /*
2476      Some vectors are needed to sum up properly on shared interface dofs.
2477      Note that preallocation is not exact, since it overestimates nonzeros
2478   */
2479 /*
2480   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2481   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2482   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2483   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2484 */
2485   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2486   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2487   /* All processes need to compute entire row ownership */
2488   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2489   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2490   for (i=0;i<nsubdomains;i++) {
2491     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2492       row_ownership[j]=i;
2493     }
2494   }
2495   /* map indices of local mat to global values */
2496   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2497   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2498   for (i=0;i<local_rows;i++) local_indices[i]=i;
2499   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2500   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2501 
2502   /*
2503      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2504      then, they will be summed up properly. This way, preallocation is always sufficient
2505   */
2506   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2507   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2508   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2509   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2510   for (i=0;i<local_rows;i++) {
2511     index_row = global_indices[i];
2512     for (j=i;j<local_rows;j++) {
2513       owner = row_ownership[index_row];
2514       index_col = global_indices[j];
2515       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2516         my_dnz[i] += 1.0;
2517       } else { /* offdiag block */
2518         my_onz[i] += 1.0;
2519       }
2520       /* same as before, interchanging rows and cols */
2521       if (i != j) {
2522         owner = row_ownership[index_col];
2523         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2524           my_dnz[j] += 1.0;
2525         } else {
2526           my_onz[j] += 1.0;
2527         }
2528       }
2529     }
2530   }
2531   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2532   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2533   if (local_rows) { /* multilevel guard */
2534     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2535     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2536   }
2537   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2538   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2539   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2540   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2541   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2542   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2543   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2544 
2545   /* set computed preallocation in dnz and onz */
2546   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2547   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2548   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2549   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2550   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2551   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2552   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2553   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2554 
2555   /* Resize preallocation if overestimated */
2556   for (i=0;i<lrows;i++) {
2557     dnz[i] = PetscMin(dnz[i],lcols);
2558     onz[i] = PetscMin(onz[i],cols-lcols);
2559   }
2560   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2561   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2562 
2563   /*
2564     Set values. Very Basic.
2565   */
2566   for (i=0;i<local_rows;i++) {
2567     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2568     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2569     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2570     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2571     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2572   }
2573   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2574   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2575   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2576 
2577   /* get back mat */
2578   *M = new_mat;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatISSubassemble_Private"
2584 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2585 {
2586   Mat             subdomain_adj;
2587   IS              new_ranks,ranks_send_to;
2588   MatPartitioning partitioner;
2589   Mat_IS          *matis;
2590   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2591   PetscInt        prank;
2592   PetscMPIInt     size,rank,color;
2593   PetscInt        *xadj,*adjncy,*oldranks;
2594   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2595   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2596   PetscErrorCode  ierr;
2597   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2598   PetscSubcomm    subcomm;
2599 
2600   PetscFunctionBegin;
2601   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2602   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2603   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2604 
2605   /* Get info on mapping */
2606   matis = (Mat_IS*)(mat->data);
2607   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2608   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2609 
2610   /* build local CSR graph of subdomains' connectivity */
2611   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2612   xadj[0] = 0;
2613   xadj[1] = PetscMax(n_neighs-1,0);
2614   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2615   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2616 
2617   if (threshold) {
2618     PetscInt* count,min_threshold;
2619     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2620     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2621     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2622       for (j=0;j<n_shared[i];j++) {
2623         count[shared[i][j]] += 1;
2624       }
2625     }
2626     /* adapt threshold since we dont want to lose significant connections */
2627     min_threshold = n_neighs;
2628     for (i=1;i<n_neighs;i++) {
2629       for (j=0;j<n_shared[i];j++) {
2630         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2631       }
2632     }
2633     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2634     threshold = PetscMax(min_threshold+1,threshold);
2635     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2636     xadj[1] = 0;
2637     for (i=1;i<n_neighs;i++) {
2638       for (j=0;j<n_shared[i];j++) {
2639         if (count[shared[i][j]] < threshold) {
2640           adjncy[xadj[1]] = neighs[i];
2641           adjncy_wgt[xadj[1]] = n_shared[i];
2642           xadj[1]++;
2643           break;
2644         }
2645       }
2646     }
2647     ierr = PetscFree(count);CHKERRQ(ierr);
2648   } else {
2649     if (xadj[1]) {
2650       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2651       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2652     }
2653   }
2654   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2655   if (use_square) {
2656     for (i=0;i<xadj[1];i++) {
2657       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2658     }
2659   }
2660   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2661 
2662   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2663 
2664   /*
2665     Restrict work on active processes only.
2666   */
2667   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2668   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2669   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2670   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2671   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2672   if (color) {
2673     ierr = PetscFree(xadj);CHKERRQ(ierr);
2674     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2675     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2676   } else {
2677     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2678     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2679     prank = rank;
2680     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2681     for (i=0;i<size;i++) {
2682       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2683     }
2684     for (i=0;i<xadj[1];i++) {
2685       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2686     }
2687     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2688     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2689     n_subdomains = (PetscInt)size;
2690     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2691 
2692     /* Partition */
2693     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2694     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2695     if (use_vwgt) {
2696       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2697       v_wgt[0] = local_size;
2698       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2699     }
2700     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2701     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2702     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2703     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2704     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2705 
2706     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2707     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2708     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2709     /* clean up */
2710     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2711     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2712     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2713     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2714   }
2715   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2716 
2717   /* assemble parallel IS for sends */
2718   i = 1;
2719   if (color) i=0;
2720   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2721   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2722 
2723   /* get back IS */
2724   *is_sends = ranks_send_to;
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatISSubassemble"
2732 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2733 {
2734   Mat                    local_mat,new_mat;
2735   Mat_IS                 *matis;
2736   IS                     is_sends_internal;
2737   PetscInt               rows,cols;
2738   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2739   PetscBool              ismatis,isdense;
2740   ISLocalToGlobalMapping l2gmap;
2741   PetscInt*              l2gmap_indices;
2742   const PetscInt*        is_indices;
2743   MatType                new_local_type;
2744   MatTypePrivate         new_local_type_private;
2745   /* buffers */
2746   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2747   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2748   /* MPI */
2749   MPI_Comm               comm;
2750   PetscMPIInt            n_sends,n_recvs,commsize;
2751   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2752   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2753   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2754   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2755   PetscErrorCode         ierr;
2756 
2757   PetscFunctionBegin;
2758   /* checks */
2759   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2760   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2761   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2762   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2763   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2764   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2765   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2766   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2767   PetscValidLogicalCollectiveInt(mat,bs,0);
2768   /* prepare IS for sending if not provided */
2769   if (!is_sends) {
2770     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2771     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2772   } else {
2773     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2774     is_sends_internal = is_sends;
2775   }
2776 
2777   /* get pointer of MATIS data */
2778   matis = (Mat_IS*)mat->data;
2779 
2780   /* get comm */
2781   comm = PetscObjectComm((PetscObject)mat);
2782 
2783   /* compute number of sends */
2784   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2785   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2786 
2787   /* compute number of receives */
2788   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2789   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2790   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2791   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2792   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2793   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2794   ierr = PetscFree(iflags);CHKERRQ(ierr);
2795 
2796   /* prepare send/receive buffers */
2797   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2798   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2799   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2800   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2801 
2802   /* Get data from local mat */
2803   if (!isdense) {
2804     /* TODO: See below some guidelines on how to prepare the local buffers */
2805     /*
2806        send_buffer_vals should contain the raw values of the local matrix
2807        send_buffer_idxs should contain:
2808        - MatType_PRIVATE type
2809        - PetscInt        size_of_l2gmap
2810        - PetscInt        global_row_indices[size_of_l2gmap]
2811        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2812     */
2813   } else {
2814     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2815     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2816     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2817     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2818     send_buffer_idxs[1] = i;
2819     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2820     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2821     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2822     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2823     for (i=0;i<n_sends;i++) {
2824       ilengths_vals[is_indices[i]] = len*len;
2825       ilengths_idxs[is_indices[i]] = len+2;
2826     }
2827   }
2828   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2829   buf_size_idxs = 0;
2830   buf_size_vals = 0;
2831   for (i=0;i<n_recvs;i++) {
2832     buf_size_idxs += (PetscInt)olengths_idxs[i];
2833     buf_size_vals += (PetscInt)olengths_vals[i];
2834   }
2835   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2836   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2837 
2838   /* get new tags for clean communications */
2839   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2840   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2841 
2842   /* allocate for requests */
2843   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2844   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2845   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2846   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2847 
2848   /* communications */
2849   ptr_idxs = recv_buffer_idxs;
2850   ptr_vals = recv_buffer_vals;
2851   for (i=0;i<n_recvs;i++) {
2852     source_dest = onodes[i];
2853     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2854     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2855     ptr_idxs += olengths_idxs[i];
2856     ptr_vals += olengths_vals[i];
2857   }
2858   for (i=0;i<n_sends;i++) {
2859     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2860     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2861     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2862   }
2863   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2864   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2865 
2866   /* assemble new l2g map */
2867   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2868   ptr_idxs = recv_buffer_idxs;
2869   buf_size_idxs = 0;
2870   for (i=0;i<n_recvs;i++) {
2871     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2872     ptr_idxs += olengths_idxs[i];
2873   }
2874   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2875   ptr_idxs = recv_buffer_idxs;
2876   buf_size_idxs = 0;
2877   for (i=0;i<n_recvs;i++) {
2878     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2879     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2880     ptr_idxs += olengths_idxs[i];
2881   }
2882   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2883   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2884   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2885 
2886   /* infer new local matrix type from received local matrices type */
2887   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2888   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2889   new_local_type_private = MATAIJ_PRIVATE;
2890   new_local_type = MATSEQAIJ;
2891   if (n_recvs) {
2892     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2893     ptr_idxs = recv_buffer_idxs;
2894     for (i=0;i<n_recvs;i++) {
2895       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2896         new_local_type_private = MATAIJ_PRIVATE;
2897         break;
2898       }
2899       ptr_idxs += olengths_idxs[i];
2900     }
2901     switch (new_local_type_private) {
2902       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2903         new_local_type = MATSEQAIJ;
2904         bs = 1;
2905         break;
2906       case MATAIJ_PRIVATE:
2907         new_local_type = MATSEQAIJ;
2908         bs = 1;
2909         break;
2910       case MATBAIJ_PRIVATE:
2911         new_local_type = MATSEQBAIJ;
2912         break;
2913       case MATSBAIJ_PRIVATE:
2914         new_local_type = MATSEQSBAIJ;
2915         break;
2916       default:
2917         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2918         break;
2919     }
2920   }
2921 
2922   /* create MATIS object */
2923   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2924   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2925   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2926   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2927   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2928   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2929 
2930   /* set values */
2931   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2932   ptr_vals = recv_buffer_vals;
2933   ptr_idxs = recv_buffer_idxs;
2934   for (i=0;i<n_recvs;i++) {
2935     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2936       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2937       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2938       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2939       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2940       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2941     }
2942     ptr_idxs += olengths_idxs[i];
2943     ptr_vals += olengths_vals[i];
2944   }
2945   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2947   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2949 
2950   { /* check */
2951     Vec       lvec,rvec;
2952     PetscReal infty_error;
2953 
2954     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2955     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2956     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2957     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2958     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2959     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2960     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2961     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2962     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2963   }
2964 
2965   /* free workspace */
2966   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2967   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2968   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2969   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2970   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2971   if (isdense) {
2972     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2973     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2974   } else {
2975     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2976   }
2977   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2978   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2979   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2980   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2981   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2982   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2983   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2984   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2985   ierr = PetscFree(onodes);CHKERRQ(ierr);
2986   /* get back new mat */
2987   *mat_n = new_mat;
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2993 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2994 {
2995   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2996   PC_IS                  *pcis = (PC_IS*)pc->data;
2997   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2998   MatNullSpace           CoarseNullSpace=NULL;
2999   ISLocalToGlobalMapping coarse_islg;
3000   IS                     coarse_is;
3001   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
3002   PetscInt               im_active=-1,active_procs=-1;
3003   PC                     pc_temp;
3004   PCType                 coarse_pc_type;
3005   KSPType                coarse_ksp_type;
3006   PetscBool              multilevel_requested,multilevel_allowed;
3007   PetscBool              setsym,issym,isbddc,isnn;
3008   MatStructure           matstruct;
3009   PetscErrorCode         ierr;
3010 
3011   PetscFunctionBegin;
3012   /* Assign global numbering to coarse dofs */
3013   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3014 
3015   /* infer some info from user */
3016   issym = PETSC_FALSE;
3017   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3018   multilevel_allowed = PETSC_FALSE;
3019   multilevel_requested = PETSC_FALSE;
3020   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3021   if (multilevel_requested) {
3022     /* count "active processes" */
3023     im_active = !!(pcis->n);
3024     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3025     if (active_procs/pcbddc->coarsening_ratio < 2) {
3026       multilevel_allowed = PETSC_FALSE;
3027     } else {
3028       multilevel_allowed = PETSC_TRUE;
3029     }
3030   }
3031 
3032   /* set defaults for coarse KSP and PC */
3033   if (multilevel_allowed) {
3034     if (issym) {
3035       coarse_ksp_type = KSPRICHARDSON;
3036     } else {
3037       coarse_ksp_type = KSPCHEBYSHEV;
3038     }
3039     coarse_pc_type = PCBDDC;
3040   } else {
3041     coarse_ksp_type = KSPPREONLY;
3042     coarse_pc_type = PCREDUNDANT;
3043   }
3044 
3045   /* create the coarse KSP object only once with defaults */
3046   if (!pcbddc->coarse_ksp) {
3047     char prefix[256],str_level[3];
3048     size_t len;
3049     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3050     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3051     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3052     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3053     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3054     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3055     /* prefix */
3056     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3057     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3058     if (!pcbddc->current_level) {
3059       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3060       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3061     } else {
3062       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3063       if (pcbddc->current_level>1) len -= 2;
3064       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3065       *(prefix+len)='\0';
3066       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3067       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3068     }
3069     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3070   }
3071   /* allow user customization */
3072   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3073   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3074   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3075 
3076   /* get some info after set from options */
3077   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3078   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3079   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3080   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3081     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3082     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3083     isbddc = PETSC_FALSE;
3084   }
3085 
3086   /* propagate BDDC info to the next level */
3087   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3088   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3089   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3090 
3091   /* creates temporary MATIS object for coarse matrix */
3092   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3093   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3094   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3095   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3096   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3097   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3098   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3099   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3100   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3101   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3102 
3103   /* assemble coarse matrix */
3104   if (isbddc || isnn) {
3105     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3106   } else {
3107     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3108   }
3109   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3110 
3111   /* create local to global scatters for coarse problem */
3112   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3113   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3114   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3115 
3116   /* propagate symmetry info to coarse matrix */
3117   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3118 
3119   /* Compute coarse null space (special handling by BDDC only) */
3120   if (pcbddc->NullSpace) {
3121     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3122     if (isbddc) {
3123       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3124     } else {
3125       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3126     }
3127   }
3128 
3129   /* set operators */
3130   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3131   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3132 
3133   /* additional KSP customization */
3134   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3135   if (max_it < 5) {
3136     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3137   }
3138   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3139 
3140 
3141   /* print some info if requested */
3142   if (pcbddc->dbg_flag) {
3143     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3144     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3145     if (!multilevel_allowed) {
3146       if (multilevel_requested) {
3147         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3148       } else if (pcbddc->max_levels) {
3149         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3150       }
3151     }
3152     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3153     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3154   }
3155 
3156   /* setup coarse ksp */
3157   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3158   if (pcbddc->dbg_flag) {
3159     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3160     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3161     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3162     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3163   }
3164 
3165   /* Check coarse problem if requested */
3166   if (pcbddc->dbg_flag) {
3167     KSP       check_ksp;
3168     KSPType   check_ksp_type;
3169     PC        check_pc;
3170     Vec       check_vec;
3171     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3172     PetscInt  its;
3173     PetscBool ispreonly,compute;
3174 
3175     /* Create ksp object suitable for estimation of extreme eigenvalues */
3176     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3177     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3178     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3179     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3180     if (ispreonly) {
3181       check_ksp_type = KSPPREONLY;
3182       compute = PETSC_FALSE;
3183     } else {
3184       if (issym) check_ksp_type = KSPCG;
3185       else check_ksp_type = KSPGMRES;
3186       compute = PETSC_TRUE;
3187     }
3188     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3189     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3190     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3191     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3192     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3193     /* create random vec */
3194     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3195     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3196     if (CoarseNullSpace) {
3197       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3198     }
3199     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3200     /* solve coarse problem */
3201     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3202     if (CoarseNullSpace) {
3203       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3204     }
3205     /* check coarse problem residual error */
3206     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3207     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3208     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3209     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3210     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3211     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3212     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3213     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3214     /* get eigenvalue estimation if preonly has not been requested */
3215     if (!ispreonly) {
3216       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3217       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3218       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3219     }
3220     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3221     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3222   }
3223   /* free memory */
3224   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3225   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3231 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3232 {
3233   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3234   PC_IS*         pcis = (PC_IS*)pc->data;
3235   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3236   PetscInt       i,j,coarse_size;
3237   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   /* get indices in local ordering for vertices and constraints */
3242   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3243   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3244   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3245   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3246   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3247   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3248   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3249 
3250   /* Compute global number of coarse dofs */
3251   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3252 
3253   /* check numbering */
3254   if (pcbddc->dbg_flag) {
3255     PetscScalar coarsesum,*array;
3256 
3257     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3258     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3259     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3260     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3261     for (i=0;i<pcbddc->local_primal_size;i++) {
3262       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3263     }
3264     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3265     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3266     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3267     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3268     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3269     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3270     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3271     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3272     for (i=0;i<pcis->n;i++) {
3273       if (array[i] == 1.0) {
3274         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3275       }
3276     }
3277     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3278     for (i=0;i<pcis->n;i++) {
3279       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3280     }
3281     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3282     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3283     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3284     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3285     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3286     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3287     if (pcbddc->dbg_flag > 1) {
3288       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3289       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3290       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3291       for (i=0;i<pcbddc->local_primal_size;i++) {
3292         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3293       }
3294       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3295     }
3296     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3297   }
3298   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3299   /* get back data */
3300   *coarse_size_n = coarse_size;
3301   *local_primal_indices_n = local_primal_indices;
3302   PetscFunctionReturn(0);
3303 }
3304 
3305