xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a58cbe93922222ef26adc1afb5b209786e2d7047)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
166   /* Vector and matrix types */
167   VecType                impVecType;
168   MatType                impMatType;
169   /* some shortcuts to scalars */
170   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
171   /* for debugging purposes */
172   PetscReal              *coarsefunctions_errors,*constraints_errors;
173 
174   PetscFunctionBegin;
175   /* get number of vertices and their local indices */
176   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
177   n_constraints = pcbddc->local_primal_size-n_vertices;
178   /* Set Non-overlapping dimensions */
179   n_B = pcis->n_B; n_D = pcis->n - n_B;
180   n_R = pcis->n-n_vertices;
181 
182   /* Set types for local objects needed by BDDC precondtioner */
183   impMatType = MATSEQDENSE;
184   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
185 
186   /* Allocating some extra storage just to be safe */
187   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
188   for (i=0;i<pcis->n;i++) auxindices[i]=i;
189 
190   /* vertices in boundary numbering */
191   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
193   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
194   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
195   if (i != n_vertices) {
196     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
197   }
198 
199   /* some work vectors on vertices and/or constraints */
200   if (n_vertices) {
201     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
202     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
203     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
204     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
205   }
206   if (n_constraints) {
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
208     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
209   }
210 
211   /* Precompute stuffs needed for preprocessing and application of BDDC*/
212   if (n_constraints) {
213     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
214     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
215     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
216     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
217 
218     /* Extract constraints on R nodes: C_{CR}  */
219     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
220     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
221     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
222 
223     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
224     for (i=0;i<n_constraints;i++) {
225       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
226       /* Get row of constraint matrix in R numbering */
227       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
228       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
229       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
230       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
231       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
232       /* Solve for row of constraint matrix in R numbering */
233       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
234       /* Set values in local_auxmat2 */
235       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
236       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
237       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
238     }
239     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
241     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
242 
243     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
244     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
245     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
246     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
247     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
248     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
249     ierr = MatSetUp(M1);CHKERRQ(ierr);
250     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
251     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
252     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
253     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
254     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
255     ierr = MatDestroy(&M2);CHKERRQ(ierr);
256     ierr = MatDestroy(&M3);CHKERRQ(ierr);
257     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
258     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
259   }
260 
261   /* Get submatrices from subdomain matrix */
262   if (n_vertices) {
263     PetscInt ibs,mbs;
264     PetscBool issbaij;
265     Mat newmat;
266 
267     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
268     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
269     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
270     if (ibs != mbs) { /* need to convert to SEQAIJ */
271       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
273       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
274       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
275       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
276     } else {
277       /* this is safe */
278       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
279       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
280       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
281         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
282         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
283         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
284         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
285         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
286         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
287       } else {
288         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
289         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
290       }
291     }
292     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
293   }
294 
295   /* Matrix of coarse basis functions (local) */
296   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
297   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
298   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
299   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
300   if (pcbddc->switch_static || pcbddc->dbg_flag) {
301     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
302     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
303     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
304     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
305   }
306 
307   if (pcbddc->dbg_flag) {
308     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
309     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
310     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
311   }
312   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
313   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
314 
315   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
316 
317   /* vertices */
318   for (i=0;i<n_vertices;i++) {
319     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
320     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
321     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
322     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
323     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
324     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
325     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
326     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
327     if (n_constraints) {
328       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
329       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
330       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
331     }
332     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
333     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
334 
335     /* Set values in coarse basis function and subdomain part of coarse_mat */
336     /* coarse basis functions */
337     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
338     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
339     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
340     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
341     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
342     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
343     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
344     if (pcbddc->switch_static || pcbddc->dbg_flag) {
345       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
346       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
348       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
350     }
351     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
352     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
353     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
354     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
355     if (n_constraints) {
356       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
357       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
358       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
359     }
360 
361     /* check */
362     if (pcbddc->dbg_flag) {
363       /* assemble subdomain vector on local nodes */
364       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
365       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
366       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
367       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
368       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
369       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
370       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
371       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
372       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
373       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
374       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
375       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
376       if (n_constraints) {
377         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
378         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
379         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
380       }
381       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
382       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
383       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
384       /* check saddle point solution */
385       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
386       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
387       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
388       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
389       /* shift by the identity matrix */
390       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
391       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
392       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
393       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
394     }
395   }
396 
397   /* constraints */
398   for (i=0;i<n_constraints;i++) {
399     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
400     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
401     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
402     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
403     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
404     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
405     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
406     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
407     if (n_vertices) {
408       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
409     }
410     /* Set values in coarse basis function and subdomain part of coarse_mat */
411     /* coarse basis functions */
412     j = i+n_vertices; /* don't touch this! */
413     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
414     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
415     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
417     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
418     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
419     if (pcbddc->switch_static || pcbddc->dbg_flag) {
420       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
421       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
423       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
425     }
426     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
427     if (n_vertices) {
428       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
429       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
431     }
432     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
433     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
434     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
435 
436     if (pcbddc->dbg_flag) {
437       /* assemble subdomain vector on nodes */
438       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
439       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
440       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
441       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
442       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
443       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
444       /* assemble subdomain vector of lagrange multipliers */
445       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
446       if (n_vertices) {
447         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
448         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
449         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
450       }
451       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
452       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
453       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
454       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
456       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
457       /* check saddle point solution */
458       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
459       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
460       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
461       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
462       /* shift by the identity matrix */
463       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
464       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
465       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
466       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
467     }
468   }
469   /* call assembling routines for local coarse basis */
470   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
472   if (pcbddc->switch_static || pcbddc->dbg_flag) {
473     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
474     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
475   }
476 
477   /* compute other basis functions for non-symmetric problems */
478   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
479   if (!setsym || (setsym && !issym)) {
480     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
481     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
482     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
483     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
484     if (pcbddc->switch_static || pcbddc->dbg_flag) {
485       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
486       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
487       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
488       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
489     }
490     for (i=0;i<pcbddc->local_primal_size;i++) {
491       if (n_constraints) {
492         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
493         for (j=0;j<n_constraints;j++) {
494           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
495         }
496         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
497         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
498       }
499       if (i<n_vertices) {
500         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
501         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
502         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
503         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
504         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
505         if (n_constraints) {
506           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507         }
508       } else {
509         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
510       }
511       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
512       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
513       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
514       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
515       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
516       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
517       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
518       if (i<n_vertices) {
519         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
520       }
521       if (pcbddc->switch_static || pcbddc->dbg_flag) {
522         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
523         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
524         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
525         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
526         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
527       }
528 
529       if (pcbddc->dbg_flag) {
530         /* assemble subdomain vector on nodes */
531         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
532         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
533         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
534         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
535         if (i<n_vertices) {
536           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
537         }
538         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
539         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
540         /* assemble subdomain vector of lagrange multipliers */
541         for (j=0;j<pcbddc->local_primal_size;j++) {
542           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
543         }
544         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
545         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
546         /* check saddle point solution */
547         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
548         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
549         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
550         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
551         /* shift by the identity matrix */
552         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
553         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
554         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
555         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
556       }
557     }
558     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     if (pcbddc->switch_static || pcbddc->dbg_flag) {
561       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
562       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
563     }
564   }
565   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
566   /* Checking coarse_sub_mat and coarse basis functios */
567   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
568   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
569   if (pcbddc->dbg_flag) {
570     Mat         coarse_sub_mat;
571     Mat         AUXMAT,TM1,TM2,TM3,TM4;
572     Mat         coarse_phi_D,coarse_phi_B;
573     Mat         coarse_psi_D,coarse_psi_B;
574     Mat         A_II,A_BB,A_IB,A_BI;
575     MatType     checkmattype=MATSEQAIJ;
576     PetscReal   real_value;
577 
578     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
579     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
580     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
581     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
582     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
583     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
584     if (pcbddc->coarse_psi_B) {
585       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
586       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
587     }
588     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
589 
590     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
591     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
592     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
593     if (pcbddc->coarse_psi_B) {
594       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
598       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
599       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
600       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
601       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
602       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
603       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
604       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
605       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
606     } else {
607       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
608       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
609       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
610       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
611       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
612       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
613       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
614       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
615     }
616     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
617     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
618     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
619     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
620     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
622     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
623     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
624     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
625     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
626     for (i=0;i<pcbddc->local_primal_size;i++) {
627       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
628     }
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
632     }
633     if (pcbddc->coarse_psi_B) {
634       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
635       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
636         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
637       }
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
641       }
642     }
643     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
645     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
646     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
647     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
648     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
649     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
650     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
651     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
652     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
653     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
654     if (pcbddc->coarse_psi_B) {
655       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
656       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
657     }
658     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
659     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
660     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
661     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
662   }
663   /* free memory */
664   if (n_vertices) {
665     ierr = PetscFree(vertices);CHKERRQ(ierr);
666     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
667     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
668     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
669     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
670     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
671   }
672   if (n_constraints) {
673     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
674     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
675     ierr = MatDestroy(&M1);CHKERRQ(ierr);
676     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
677   }
678   ierr = PetscFree(auxindices);CHKERRQ(ierr);
679   /* get back data */
680   *coarse_submat_vals_n = coarse_submat_vals;
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
686 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
687 {
688   PC_IS*            pcis = (PC_IS*)(pc->data);
689   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
690   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
691   PetscBool         issbaij,isbaij;
692   /* manage repeated solves */
693   MatReuse          reuse;
694   MatStructure      matstruct;
695   PetscErrorCode    ierr;
696 
697   PetscFunctionBegin;
698   /* get mat flags */
699   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
700   reuse = MAT_INITIAL_MATRIX;
701   if (pc->setupcalled) {
702     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
703     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
704     if (matstruct == SAME_NONZERO_PATTERN) {
705       reuse = MAT_REUSE_MATRIX;
706     } else {
707       reuse = MAT_INITIAL_MATRIX;
708     }
709   }
710   if (reuse == MAT_INITIAL_MATRIX) {
711     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
712     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
713     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
714     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
715     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
716   }
717 
718   /* transform local matrices if needed */
719   if (pcbddc->use_change_of_basis) {
720     Mat         change_mat_all;
721     PetscScalar *row_cmat_values;
722     PetscInt    *row_cmat_indices;
723     PetscInt    *nnz,*is_indices,*temp_indices;
724     PetscInt    i,j,k,n_D,n_B;
725 
726     /* Get Non-overlapping dimensions */
727     n_B = pcis->n_B;
728     n_D = pcis->n-n_B;
729 
730     /* compute nonzero structure of change of basis on all local nodes */
731     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
734     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
735     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
736     k=1;
737     for (i=0;i<n_B;i++) {
738       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
739       nnz[is_indices[i]]=j;
740       if (k < j) k = j;
741       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
742     }
743     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
744     /* assemble change of basis matrix on the whole set of local dofs */
745     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
746     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
747     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
748     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
749     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
750     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
751     for (i=0;i<n_D;i++) {
752       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
753     }
754     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
756     for (i=0;i<n_B;i++) {
757       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
758       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
759       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
760       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
761     }
762     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
763     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
764     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
765     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
766     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
767     if (!issbaij && !isbaij) {
768       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
769     } else {
770       Mat work_mat;
771       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
772       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
774     }
775     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
776     ierr = PetscFree(nnz);CHKERRQ(ierr);
777     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
778   } else {
779     /* without change of basis, the local matrix is unchanged */
780     if (!pcbddc->local_mat) {
781       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
782       pcbddc->local_mat = matis->A;
783     }
784   }
785 
786   /* get submatrices */
787   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
788   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
789   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
790   if (!issbaij) {
791     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
792     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
793   } else {
794     Mat newmat;
795     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
796     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
797     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
798     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
805 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
806 {
807   PC_IS*         pcis = (PC_IS*)(pc->data);
808   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
809   IS             is_aux1,is_aux2;
810   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
811   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
812   PetscInt       vbs,bs;
813   PetscBT        bitmask;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   /* Set Non-overlapping dimensions */
818   n_B = pcis->n_B; n_D = pcis->n - n_B;
819   /* get vertex indices from constraint matrix */
820   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
821 
822   /* create auxiliary bitmask */
823   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
824   for (i=0;i<n_vertices;i++) {
825     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
826   }
827 
828   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
829   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
830   for (i=0, n_R=0; i<pcis->n; i++) {
831     if (!PetscBTLookup(bitmask,i)) {
832       idx_R_local[n_R] = i;
833       n_R++;
834     }
835   }
836 
837   /* Block code */
838   vbs = 1;
839   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
840   if (bs>1 && !(n_vertices%bs)) {
841     PetscBool is_blocked = PETSC_TRUE;
842     PetscInt  *vary;
843     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
844     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
845     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
846     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
847     for (i=0; i<n_vertices; i++) {
848       if (vary[i]!=0 && vary[i]!=bs) {
849         is_blocked = PETSC_FALSE;
850         break;
851       }
852     }
853     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
854       vbs = bs;
855       for (i=0;i<n_R/vbs;i++) {
856         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
857       }
858     }
859     ierr = PetscFree(vary);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(vertices);CHKERRQ(ierr);
862   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
863   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
864 
865   /* print some info if requested */
866   if (pcbddc->dbg_flag) {
867     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
868     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
869     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
870     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
871     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
872     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
873     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
874   }
875 
876   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
877   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
878   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
879   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
880   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
881   for (i=0; i<n_D; i++) {
882     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
883   }
884   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
885   for (i=0, j=0; i<n_R; i++) {
886     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
887       aux_array1[j++] = i;
888     }
889   }
890   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
891   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
892   for (i=0, j=0; i<n_B; i++) {
893     if (!PetscBTLookup(bitmask,is_indices[i])) {
894       aux_array2[j++] = i;
895     }
896   }
897   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
899   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
900   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
901   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
902 
903   if (pcbddc->switch_static || pcbddc->dbg_flag) {
904     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
905     for (i=0, j=0; i<n_R; i++) {
906       if (PetscBTLookup(bitmask,idx_R_local[i])) {
907         aux_array1[j++] = i;
908       }
909     }
910     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
911     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
912     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
913   }
914   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
915   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
922 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
923 {
924   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
925   PC_IS          *pcis = (PC_IS*)pc->data;
926   PC             pc_temp;
927   Mat            A_RR;
928   Vec            vec1,vec2,vec3;
929   MatStructure   matstruct;
930   PetscScalar    m_one = -1.0;
931   PetscReal      value;
932   PetscInt       n_D,n_R,ibs,mbs;
933   PetscBool      use_exact,use_exact_reduced,issbaij;
934   PetscErrorCode ierr;
935   /* prefixes stuff */
936   char           dir_prefix[256],neu_prefix[256],str_level[3];
937   size_t         len;
938 
939   PetscFunctionBegin;
940   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
941 
942   /* compute prefixes */
943   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
944   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
945   if (!pcbddc->current_level) {
946     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
947     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
948     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
949     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
950   } else {
951     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
952     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
953     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
954     len -= 15; /* remove "pc_bddc_coarse_" */
955     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
956     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
957     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
958     *(dir_prefix+len)='\0';
959     *(neu_prefix+len)='\0';
960     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
961     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
962     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
963     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
964   }
965 
966   /* DIRICHLET PROBLEM */
967   /* Matrix for Dirichlet problem is pcis->A_II */
968   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
969   if (!pcbddc->ksp_D) { /* create object if not yet build */
970     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
971     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
972     /* default */
973     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
974     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
975     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
976     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
977     if (issbaij) {
978       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
979     } else {
980       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
981     }
982     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
983   }
984   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
985   /* Allow user's customization */
986   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
987   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
988   if (!n_D) {
989     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
990     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
991   }
992   /* Set Up KSP for Dirichlet problem of BDDC */
993   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
994   /* set ksp_D into pcis data */
995   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
996   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
997   pcis->ksp_D = pcbddc->ksp_D;
998 
999   /* NEUMANN PROBLEM */
1000   /* Matrix for Neumann problem is A_RR -> we need to create it */
1001   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1002   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1003   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1004   if (ibs != mbs) {
1005     Mat newmat;
1006     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1007     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1008     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1009   } else {
1010     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1011   }
1012   if (!pcbddc->ksp_R) { /* create object if not yet build */
1013     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1014     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1015     /* default */
1016     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1017     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1018     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1019     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1020     if (issbaij) {
1021       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1022     } else {
1023       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1024     }
1025     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1026   }
1027   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1028   /* Allow user's customization */
1029   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1030   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1031   if (!n_R) {
1032     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1033     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1034   }
1035   /* Set Up KSP for Neumann problem of BDDC */
1036   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1037 
1038   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1039 
1040   /* Dirichlet */
1041   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1042   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1043   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1044   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1045   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1046   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1047   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1048   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1049   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1050   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1051   /* need to be adapted? */
1052   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1053   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1054   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1055   /* print info */
1056   if (pcbddc->dbg_flag) {
1057     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1058     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1059     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1060     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1061     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1062   }
1063   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1064     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1065   }
1066 
1067   /* Neumann */
1068   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1069   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1070   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1071   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1072   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1073   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1074   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1075   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1076   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1077   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1078   /* need to be adapted? */
1079   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1080   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1081   /* print info */
1082   if (pcbddc->dbg_flag) {
1083     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1084     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1085   }
1086   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1087     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1088   }
1089   /* free Neumann problem's matrix */
1090   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1096 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1097 {
1098   PetscErrorCode ierr;
1099   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1100 
1101   PetscFunctionBegin;
1102   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1103   if (pcbddc->local_auxmat1) {
1104     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1105     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1106   }
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1112 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1113 {
1114   PetscErrorCode ierr;
1115   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1116   PC_IS*            pcis = (PC_IS*)  (pc->data);
1117   const PetscScalar zero = 0.0;
1118 
1119   PetscFunctionBegin;
1120   /* Application of PHI^T (or PSI^T)  */
1121   if (pcbddc->coarse_psi_B) {
1122     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1123     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1124   } else {
1125     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1126     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1127   }
1128   /* Scatter data of coarse_rhs */
1129   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1130   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1131 
1132   /* Local solution on R nodes */
1133   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1134   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1135   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   if (pcbddc->switch_static) {
1137     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   }
1140   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1141   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1142   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   if (pcbddc->switch_static) {
1145     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147   }
1148 
1149   /* Coarse solution */
1150   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1152     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1153   }
1154   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156 
1157   /* Sum contributions from two levels */
1158   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1159   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1165 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1166 {
1167   PetscErrorCode ierr;
1168   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1169 
1170   PetscFunctionBegin;
1171   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1177 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1178 {
1179   PetscErrorCode ierr;
1180   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1181 
1182   PetscFunctionBegin;
1183   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 /* uncomment for testing purposes */
1188 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1189 #undef __FUNCT__
1190 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1191 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1192 {
1193   PetscErrorCode    ierr;
1194   PC_IS*            pcis = (PC_IS*)(pc->data);
1195   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1196   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1197   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1198   MatType           impMatType=MATSEQAIJ;
1199   /* one and zero */
1200   PetscScalar       one=1.0,zero=0.0;
1201   /* space to store constraints and their local indices */
1202   PetscScalar       *temp_quadrature_constraint;
1203   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1204   /* iterators */
1205   PetscInt          i,j,k,total_counts,temp_start_ptr;
1206   /* stuff to store connected components stored in pcbddc->mat_graph */
1207   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1208   PetscInt          n_ISForFaces,n_ISForEdges;
1209   /* near null space stuff */
1210   MatNullSpace      nearnullsp;
1211   const Vec         *nearnullvecs;
1212   Vec               *localnearnullsp;
1213   PetscBool         nnsp_has_cnst;
1214   PetscInt          nnsp_size;
1215   PetscScalar       *array;
1216   /* BLAS integers */
1217   PetscBLASInt      lwork,lierr;
1218   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1219   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1220   /* LAPACK working arrays for SVD or POD */
1221   PetscBool         skip_lapack;
1222   PetscScalar       *work;
1223   PetscReal         *singular_vals;
1224 #if defined(PETSC_USE_COMPLEX)
1225   PetscReal         *rwork;
1226 #endif
1227 #if defined(PETSC_MISSING_LAPACK_GESVD)
1228   PetscBLASInt      Blas_one_2=1;
1229   PetscScalar       *temp_basis,*correlation_mat;
1230 #else
1231   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1232   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1233 #endif
1234   /* change of basis */
1235   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1236   PetscBool         boolforchange,qr_needed;
1237   PetscBT           touched,change_basis,qr_needed_idx;
1238   /* auxiliary stuff */
1239   PetscInt          *nnz,*is_indices,*local_to_B;
1240   /* some quantities */
1241   PetscInt          n_vertices,total_primal_vertices;
1242   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1243 
1244 
1245   PetscFunctionBegin;
1246   /* Get index sets for faces, edges and vertices from graph */
1247   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1248     pcbddc->use_vertices = PETSC_TRUE;
1249   }
1250   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1251   /* HACK: provide functions to set change of basis */
1252   if (!ISForVertices && pcbddc->NullSpace) {
1253     pcbddc->use_change_of_basis = PETSC_TRUE;
1254     pcbddc->use_change_on_faces = PETSC_FALSE;
1255   }
1256   /* print some info */
1257   if (pcbddc->dbg_flag) {
1258     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1259     i = 0;
1260     if (ISForVertices) {
1261       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1266     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1267   }
1268   /* check if near null space is attached to global mat */
1269   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1270   if (nearnullsp) {
1271     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1272   } else { /* if near null space is not provided BDDC uses constants by default */
1273     nnsp_size = 0;
1274     nnsp_has_cnst = PETSC_TRUE;
1275   }
1276   /* get max number of constraints on a single cc */
1277   max_constraints = nnsp_size;
1278   if (nnsp_has_cnst) max_constraints++;
1279 
1280   /*
1281        Evaluate maximum storage size needed by the procedure
1282        - temp_indices will contain start index of each constraint stored as follows
1283        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1284        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1285        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1286                                                                                                                                                          */
1287   total_counts = n_ISForFaces+n_ISForEdges;
1288   total_counts *= max_constraints;
1289   n_vertices = 0;
1290   if (ISForVertices) {
1291     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1292   }
1293   total_counts += n_vertices;
1294   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1295   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1296   total_counts = 0;
1297   max_size_of_constraint = 0;
1298   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1299     if (i<n_ISForEdges) {
1300       used_IS = &ISForEdges[i];
1301     } else {
1302       used_IS = &ISForFaces[i-n_ISForEdges];
1303     }
1304     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1305     total_counts += j;
1306     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1307   }
1308   total_counts *= max_constraints;
1309   total_counts += n_vertices;
1310   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1311   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1312   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1313   /* local to boundary numbering */
1314   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1315   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1316   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1317   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1318   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1319   /* get local part of global near null space vectors */
1320   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1321   for (k=0;k<nnsp_size;k++) {
1322     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1323     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   }
1326 
1327   /* whether or not to skip lapack calls */
1328   skip_lapack = PETSC_TRUE;
1329   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1330 
1331   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1332   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1333     PetscScalar temp_work;
1334 #if defined(PETSC_MISSING_LAPACK_GESVD)
1335     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1336     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1337     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1338     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1339 #if defined(PETSC_USE_COMPLEX)
1340     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1341 #endif
1342     /* now we evaluate the optimal workspace using query with lwork=-1 */
1343     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1344     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1345     lwork = -1;
1346     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1347 #if !defined(PETSC_USE_COMPLEX)
1348     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1349 #else
1350     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1351 #endif
1352     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1353     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1354 #else /* on missing GESVD */
1355     /* SVD */
1356     PetscInt max_n,min_n;
1357     max_n = max_size_of_constraint;
1358     min_n = max_constraints;
1359     if (max_size_of_constraint < max_constraints) {
1360       min_n = max_size_of_constraint;
1361       max_n = max_constraints;
1362     }
1363     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1364 #if defined(PETSC_USE_COMPLEX)
1365     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1366 #endif
1367     /* now we evaluate the optimal workspace using query with lwork=-1 */
1368     lwork = -1;
1369     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1370     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1371     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1372     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1373 #if !defined(PETSC_USE_COMPLEX)
1374     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1375 #else
1376     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1377 #endif
1378     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1379     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1380 #endif /* on missing GESVD */
1381     /* Allocate optimal workspace */
1382     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1383     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1384   }
1385   /* Now we can loop on constraining sets */
1386   total_counts = 0;
1387   temp_indices[0] = 0;
1388   /* vertices */
1389   if (ISForVertices) {
1390     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1391     if (nnsp_has_cnst) { /* consider all vertices */
1392       for (i=0;i<n_vertices;i++) {
1393         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1394         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1395         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1396         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1397         total_counts++;
1398       }
1399     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1400       PetscBool used_vertex;
1401       for (i=0;i<n_vertices;i++) {
1402         used_vertex = PETSC_FALSE;
1403         k = 0;
1404         while (!used_vertex && k<nnsp_size) {
1405           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1406           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1407             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1408             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1409             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1410             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1411             total_counts++;
1412             used_vertex = PETSC_TRUE;
1413           }
1414           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1415           k++;
1416         }
1417       }
1418     }
1419     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1420     n_vertices = total_counts;
1421   }
1422 
1423   /* edges and faces */
1424   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1425     if (i<n_ISForEdges) {
1426       used_IS = &ISForEdges[i];
1427       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1428     } else {
1429       used_IS = &ISForFaces[i-n_ISForEdges];
1430       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1431     }
1432     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1433     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1434     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1435     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     /* change of basis should not be performed on local periodic nodes */
1437     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1438     if (nnsp_has_cnst) {
1439       PetscScalar quad_value;
1440       temp_constraints++;
1441       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1442       for (j=0;j<size_of_constraint;j++) {
1443         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1444         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1445         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1446       }
1447       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1448       if (boolforchange) {
1449         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1450       }
1451       total_counts++;
1452     }
1453     for (k=0;k<nnsp_size;k++) {
1454       PetscReal real_value;
1455       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1456       for (j=0;j<size_of_constraint;j++) {
1457         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1458         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1459         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1460       }
1461       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1462       /* check if array is null on the connected component */
1463       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1464       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1465       if (real_value > 0.0) { /* keep indices and values */
1466         temp_constraints++;
1467         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1468         if (boolforchange) {
1469           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1470         }
1471         total_counts++;
1472       }
1473     }
1474     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1475     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1476     if (!pcbddc->use_nnsp_true && temp_constraints) {
1477       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1478 
1479 #if defined(PETSC_MISSING_LAPACK_GESVD)
1480       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1481          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1482          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1483             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1484             from that computed using LAPACKgesvd
1485          -> This is due to a different computation of eigenvectors in LAPACKheev
1486          -> The quality of the POD-computed basis will be the same */
1487       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1488       /* Store upper triangular part of correlation matrix */
1489       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1490       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1491       for (j=0;j<temp_constraints;j++) {
1492         for (k=0;k<j+1;k++) {
1493           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1494         }
1495       }
1496       /* compute eigenvalues and eigenvectors of correlation matrix */
1497       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1498       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1499 #if !defined(PETSC_USE_COMPLEX)
1500       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1501 #else
1502       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1503 #endif
1504       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1505       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1506       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1507       j=0;
1508       while (j < temp_constraints && singular_vals[j] < tol) j++;
1509       total_counts=total_counts-j;
1510       /* scale and copy POD basis into used quadrature memory */
1511       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1514       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1515       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1516       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1517       if (j<temp_constraints) {
1518         PetscInt ii;
1519         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1520         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1521         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1522         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1523         for (k=0;k<temp_constraints-j;k++) {
1524           for (ii=0;ii<size_of_constraint;ii++) {
1525             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1526           }
1527         }
1528       }
1529 #else  /* on missing GESVD */
1530       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1532       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1533       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1534 #if !defined(PETSC_USE_COMPLEX)
1535       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1536 #else
1537       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1538 #endif
1539       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1540       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1541       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1542       k = temp_constraints;
1543       if (k > size_of_constraint) k = size_of_constraint;
1544       j = 0;
1545       while (j < k && singular_vals[k-j-1] < tol) j++;
1546       total_counts = total_counts-temp_constraints+k-j;
1547 #endif /* on missing GESVD */
1548     }
1549   }
1550   /* free index sets of faces, edges and vertices */
1551   for (i=0;i<n_ISForFaces;i++) {
1552     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1553   }
1554   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1555   for (i=0;i<n_ISForEdges;i++) {
1556     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1559   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1560 
1561   /* free workspace */
1562   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1563     ierr = PetscFree(work);CHKERRQ(ierr);
1564 #if defined(PETSC_USE_COMPLEX)
1565     ierr = PetscFree(rwork);CHKERRQ(ierr);
1566 #endif
1567     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1568 #if defined(PETSC_MISSING_LAPACK_GESVD)
1569     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1570     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1571 #endif
1572   }
1573   for (k=0;k<nnsp_size;k++) {
1574     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1575   }
1576   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1577 
1578   /* set quantities in pcbddc data structure */
1579   /* n_vertices defines the number of subdomain corners in the primal space */
1580   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1581   pcbddc->local_primal_size = total_counts;
1582   pcbddc->n_vertices = n_vertices;
1583   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1584 
1585   /* Create constraint matrix */
1586   /* The constraint matrix is used to compute the l2g map of primal dofs */
1587   /* so we need to set it up properly either with or without change of basis */
1588   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1589   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1590   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1591   /* array to compute a local numbering of constraints : vertices first then constraints */
1592   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1593   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1594   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1595   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1596   /* auxiliary stuff for basis change */
1597   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1598   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1599 
1600   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1601   total_primal_vertices=0;
1602   for (i=0;i<pcbddc->local_primal_size;i++) {
1603     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1604     if (size_of_constraint == 1) {
1605       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1606       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1607       aux_primal_minloc[total_primal_vertices]=0;
1608       total_primal_vertices++;
1609     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1610       PetscInt min_loc,min_index;
1611       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1612       /* find first untouched local node */
1613       k = 0;
1614       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1615       min_index = global_indices[k];
1616       min_loc = k;
1617       /* search the minimum among global nodes already untouched on the cc */
1618       for (k=1;k<size_of_constraint;k++) {
1619         /* there can be more than one constraint on a single connected component */
1620         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1621           min_index = global_indices[k];
1622           min_loc = k;
1623         }
1624       }
1625       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1626       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1627       aux_primal_minloc[total_primal_vertices]=min_loc;
1628       total_primal_vertices++;
1629     }
1630   }
1631   /* determine if a QR strategy is needed for change of basis */
1632   qr_needed = PETSC_FALSE;
1633   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1634   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1635     if (PetscBTLookup(change_basis,i)) {
1636       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1637       j = 0;
1638       for (k=0;k<size_of_constraint;k++) {
1639         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1640           j++;
1641         }
1642       }
1643       /* found more than one primal dof on the cc */
1644       if (j > 1) {
1645         PetscBTSet(qr_needed_idx,i);
1646         qr_needed = PETSC_TRUE;
1647       }
1648     }
1649   }
1650   /* free workspace */
1651   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1652   /* permute indices in order to have a sorted set of vertices */
1653   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1654 
1655   /* nonzero structure of constraint matrix */
1656   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1657   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1658   j=total_primal_vertices;
1659   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1660     if (!PetscBTLookup(change_basis,i)) {
1661       nnz[j]=temp_indices[i+1]-temp_indices[i];
1662       j++;
1663     }
1664   }
1665   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1666   ierr = PetscFree(nnz);CHKERRQ(ierr);
1667   /* set values in constraint matrix */
1668   for (i=0;i<total_primal_vertices;i++) {
1669     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1670   }
1671   total_counts = total_primal_vertices;
1672   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1673     if (!PetscBTLookup(change_basis,i)) {
1674       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1675       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1676       total_counts++;
1677     }
1678   }
1679   /* assembling */
1680   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   /*
1683   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1684   */
1685   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1686   if (pcbddc->use_change_of_basis) {
1687     /* dual and primal dofs on a single cc */
1688     PetscInt     dual_dofs,primal_dofs;
1689     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1690     PetscInt     primal_counter;
1691     /* working stuff for GEQRF */
1692     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1693     PetscBLASInt lqr_work;
1694     /* working stuff for UNGQR */
1695     PetscScalar  *gqr_work,lgqr_work_t;
1696     PetscBLASInt lgqr_work;
1697     /* working stuff for TRTRS */
1698     PetscScalar  *trs_rhs;
1699     PetscBLASInt Blas_NRHS;
1700     /* pointers for values insertion into change of basis matrix */
1701     PetscInt     *start_rows,*start_cols;
1702     PetscScalar  *start_vals;
1703     /* working stuff for values insertion */
1704     PetscBT      is_primal;
1705 
1706     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1707     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1708     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1709     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1710     /* work arrays */
1711     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1712     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1713     /* nonzeros per row */
1714     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1715       if (PetscBTLookup(change_basis,i)) {
1716         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1717         if (PetscBTLookup(qr_needed_idx,i)) {
1718           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1719         } else {
1720           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1721           /* get local primal index on the cc */
1722           j = 0;
1723           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1724           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1725         }
1726       }
1727     }
1728     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1729     ierr = PetscFree(nnz);CHKERRQ(ierr);
1730     /* Set initial identity in the matrix */
1731     for (i=0;i<pcis->n_B;i++) {
1732       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1733     }
1734 
1735     if (pcbddc->dbg_flag) {
1736       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1737       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1738     }
1739 
1740 
1741     /* Now we loop on the constraints which need a change of basis */
1742     /*
1743        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1744        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1745 
1746        Basic blocks of change of basis matrix T computed
1747 
1748           - Using the following block transformation if there is only a primal dof on the cc
1749             (in the example, primal dof is the last one of the edge in LOCAL ordering
1750              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1751             | 1        0   ...        0     1 |
1752             | 0        1   ...        0     1 |
1753             |              ...                |
1754             | 0        ...            1     1 |
1755             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1756 
1757           - via QR decomposition of constraints otherwise
1758     */
1759     if (qr_needed) {
1760       /* space to store Q */
1761       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1762       /* first we issue queries for optimal work */
1763       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1764       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1765       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1766       lqr_work = -1;
1767       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1768       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1769       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1770       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1771       lgqr_work = -1;
1772       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1773       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1774       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1775       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1776       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1777       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1778       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1779       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1780       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1781       /* array to store scaling factors for reflectors */
1782       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1783       /* array to store rhs and solution of triangular solver */
1784       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1785       /* allocating workspace for check */
1786       if (pcbddc->dbg_flag) {
1787         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1788       }
1789     }
1790     /* array to store whether a node is primal or not */
1791     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1792     for (i=0;i<total_primal_vertices;i++) {
1793       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1794     }
1795 
1796     /* loop on constraints and see whether or not they need a change of basis and compute it */
1797     /* -> using implicit ordering contained in temp_indices data */
1798     total_counts = pcbddc->n_vertices;
1799     primal_counter = total_counts;
1800     while (total_counts<pcbddc->local_primal_size) {
1801       primal_dofs = 1;
1802       if (PetscBTLookup(change_basis,total_counts)) {
1803         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1804         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1805           primal_dofs++;
1806         }
1807         /* get constraint info */
1808         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1809         dual_dofs = size_of_constraint-primal_dofs;
1810 
1811         if (pcbddc->dbg_flag) {
1812           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1813         }
1814 
1815         if (primal_dofs > 1) { /* QR */
1816 
1817           /* copy quadrature constraints for change of basis check */
1818           if (pcbddc->dbg_flag) {
1819             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1820           }
1821           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1822           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1823 
1824           /* compute QR decomposition of constraints */
1825           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1826           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1827           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1828           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1829           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1830           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1831           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1832 
1833           /* explictly compute R^-T */
1834           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1835           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1836           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1837           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1838           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1839           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1840           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1841           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1842           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1843           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1844 
1845           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1846           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1847           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1848           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1849           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1850           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1851           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1852           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1853           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1854 
1855           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1856              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1857              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1858           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1859           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1860           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1861           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1862           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1863           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1864           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1865           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1866           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1867           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1868 
1869           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1870           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1871           /* insert cols for primal dofs */
1872           for (j=0;j<primal_dofs;j++) {
1873             start_vals = &qr_basis[j*size_of_constraint];
1874             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1875             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1876           }
1877           /* insert cols for dual dofs */
1878           for (j=0,k=0;j<dual_dofs;k++) {
1879             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1880               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1881               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1882               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1883               j++;
1884             }
1885           }
1886 
1887           /* check change of basis */
1888           if (pcbddc->dbg_flag) {
1889             PetscInt   ii,jj;
1890             PetscBool valid_qr=PETSC_TRUE;
1891             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1892             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1893             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1894             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1895             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1896             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1897             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1898             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1899             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1900             for (jj=0;jj<size_of_constraint;jj++) {
1901               for (ii=0;ii<primal_dofs;ii++) {
1902                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1903                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1904               }
1905             }
1906             if (!valid_qr) {
1907               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1908               for (jj=0;jj<size_of_constraint;jj++) {
1909                 for (ii=0;ii<primal_dofs;ii++) {
1910                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1911                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1912                   }
1913                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1914                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1915                   }
1916                 }
1917               }
1918             } else {
1919               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1920             }
1921           }
1922         } else { /* simple transformation block */
1923           PetscInt row,col;
1924           PetscScalar val;
1925           for (j=0;j<size_of_constraint;j++) {
1926             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1927             if (!PetscBTLookup(is_primal,row)) {
1928               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1929               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1930               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1931             } else {
1932               for (k=0;k<size_of_constraint;k++) {
1933                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1934                 if (row != col) {
1935                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1936                 } else {
1937                   val = 1.0;
1938                 }
1939                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1940               }
1941             }
1942           }
1943         }
1944         /* increment primal counter */
1945         primal_counter += primal_dofs;
1946       } else {
1947         if (pcbddc->dbg_flag) {
1948           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1949         }
1950       }
1951       /* increment constraint counter total_counts */
1952       total_counts += primal_dofs;
1953     }
1954 
1955     /* free workspace */
1956     if (qr_needed) {
1957       if (pcbddc->dbg_flag) {
1958         ierr = PetscFree(work);CHKERRQ(ierr);
1959       }
1960       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1961       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1962       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1963       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1964       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1965     }
1966     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1967     /* assembling */
1968     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1970     /*
1971     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1972     */
1973   }
1974 
1975   /* flush dbg viewer */
1976   if (pcbddc->dbg_flag) {
1977     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1978   }
1979 
1980   /* free workspace */
1981   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1982   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1983   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1984   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1985   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1986   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1987   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1988   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1989   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1990   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1996 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1997 {
1998   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1999   PC_IS       *pcis = (PC_IS*)pc->data;
2000   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2001   PetscInt    bs,ierr,i,vertex_size;
2002   PetscViewer viewer=pcbddc->dbg_viewer;
2003 
2004   PetscFunctionBegin;
2005   /* Init local Graph struct */
2006   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2007 
2008   /* Check validity of the csr graph passed in by the user */
2009   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2010     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2011   }
2012 
2013   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2014   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2015     Mat mat_adj;
2016     const PetscInt *xadj,*adjncy;
2017     PetscBool flg_row=PETSC_TRUE;
2018 
2019     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2020     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2021     if (!flg_row) {
2022       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2023     }
2024     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2025     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2026     if (!flg_row) {
2027       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2028     }
2029     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2030   }
2031 
2032   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2033   vertex_size = 1;
2034   if (!pcbddc->user_provided_isfordofs) {
2035     if (!pcbddc->n_ISForDofs) {
2036       IS *custom_ISForDofs;
2037 
2038       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2039       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2040       for (i=0;i<bs;i++) {
2041         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2042       }
2043       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2044       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2045       /* remove my references to IS objects */
2046       for (i=0;i<bs;i++) {
2047         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2048       }
2049       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2050     }
2051   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2052     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2053   }
2054 
2055   /* Setup of Graph */
2056   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2057 
2058   /* Graph's connected components analysis */
2059   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2060 
2061   /* print some info to stdout */
2062   if (pcbddc->dbg_flag) {
2063     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2064   }
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2070 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2071 {
2072   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2073   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   n = 0;
2078   vertices = 0;
2079   if (pcbddc->ConstraintMatrix) {
2080     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2081     for (i=0;i<local_primal_size;i++) {
2082       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2083       if (size_of_constraint == 1) n++;
2084       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2085     }
2086     if (vertices_idx) {
2087       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2088       n = 0;
2089       for (i=0;i<local_primal_size;i++) {
2090         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2091         if (size_of_constraint == 1) {
2092           vertices[n++]=row_cmat_indices[0];
2093         }
2094         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2095       }
2096     }
2097   }
2098   *n_vertices = n;
2099   if (vertices_idx) *vertices_idx = vertices;
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2105 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2106 {
2107   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2108   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2109   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2110   PetscBT        touched;
2111   PetscErrorCode ierr;
2112 
2113     /* This function assumes that the number of local constraints per connected component
2114        is not greater than the number of nodes defined for the connected component
2115        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2116   PetscFunctionBegin;
2117   n = 0;
2118   constraints_index = 0;
2119   if (pcbddc->ConstraintMatrix) {
2120     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2121     max_size_of_constraint = 0;
2122     for (i=0;i<local_primal_size;i++) {
2123       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2124       if (size_of_constraint > 1) {
2125         n++;
2126       }
2127       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2128       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2129     }
2130     if (constraints_idx) {
2131       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2132       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2133       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2134       n = 0;
2135       for (i=0;i<local_primal_size;i++) {
2136         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2137         if (size_of_constraint > 1) {
2138           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2139           /* find first untouched local node */
2140           j = 0;
2141           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2142           min_index = row_cmat_global_indices[j];
2143           min_loc = j;
2144           /* search the minimum among nodes not yet touched on the connected component
2145              since there can be more than one constraint on a single cc */
2146           for (j=1;j<size_of_constraint;j++) {
2147             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2148               min_index = row_cmat_global_indices[j];
2149               min_loc = j;
2150             }
2151           }
2152           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2153           constraints_index[n++] = row_cmat_indices[min_loc];
2154         }
2155         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2156       }
2157       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2158       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2159     }
2160   }
2161   *n_constraints = n;
2162   if (constraints_idx) *constraints_idx = constraints_index;
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* the next two functions has been adapted from pcis.c */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "PCBDDCApplySchur"
2169 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2170 {
2171   PetscErrorCode ierr;
2172   PC_IS          *pcis = (PC_IS*)(pc->data);
2173 
2174   PetscFunctionBegin;
2175   if (!vec2_B) { vec2_B = v; }
2176   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2177   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2178   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2179   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2180   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 #undef __FUNCT__
2185 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2186 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2187 {
2188   PetscErrorCode ierr;
2189   PC_IS          *pcis = (PC_IS*)(pc->data);
2190 
2191   PetscFunctionBegin;
2192   if (!vec2_B) { vec2_B = v; }
2193   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2194   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2195   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2196   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2197   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "PCBDDCSubsetNumbering"
2203 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2204 {
2205   Vec            local_vec,global_vec;
2206   IS             seqis,paris;
2207   VecScatter     scatter_ctx;
2208   PetscScalar    *array;
2209   PetscInt       *temp_global_dofs;
2210   PetscScalar    globalsum;
2211   PetscInt       i,j,s;
2212   PetscInt       nlocals,first_index,old_index,max_local;
2213   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2214   PetscMPIInt    *dof_sizes,*dof_displs;
2215   PetscBool      first_found;
2216   PetscErrorCode ierr;
2217 
2218   PetscFunctionBegin;
2219   /* mpi buffers */
2220   MPI_Comm_size(comm,&size_prec_comm);
2221   MPI_Comm_rank(comm,&rank_prec_comm);
2222   j = ( !rank_prec_comm ? size_prec_comm : 0);
2223   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2224   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2225   /* get maximum size of subset */
2226   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2227   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2228   max_local = 0;
2229   if (n_local_dofs) {
2230     max_local = temp_global_dofs[0];
2231     for (i=1;i<n_local_dofs;i++) {
2232       if (max_local < temp_global_dofs[i] ) {
2233         max_local = temp_global_dofs[i];
2234       }
2235     }
2236   }
2237   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2238   max_global++;
2239   max_local = 0;
2240   if (n_local_dofs) {
2241     max_local = local_dofs[0];
2242     for (i=1;i<n_local_dofs;i++) {
2243       if (max_local < local_dofs[i] ) {
2244         max_local = local_dofs[i];
2245       }
2246     }
2247   }
2248   max_local++;
2249   /* allocate workspace */
2250   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2251   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2252   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2253   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2254   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2255   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2256   /* create scatter */
2257   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2258   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2259   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2260   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2261   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2262   /* init array */
2263   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2264   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2265   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2266   if (local_dofs_mult) {
2267     for (i=0;i<n_local_dofs;i++) {
2268       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2269     }
2270   } else {
2271     for (i=0;i<n_local_dofs;i++) {
2272       array[local_dofs[i]]=1.0;
2273     }
2274   }
2275   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2276   /* scatter into global vec and get total number of global dofs */
2277   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2278   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2280   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2281   /* Fill global_vec with cumulative function for global numbering */
2282   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2283   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2284   nlocals = 0;
2285   first_index = -1;
2286   first_found = PETSC_FALSE;
2287   for (i=0;i<s;i++) {
2288     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2289       first_found = PETSC_TRUE;
2290       first_index = i;
2291     }
2292     nlocals += (PetscInt)PetscRealPart(array[i]);
2293   }
2294   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2295   if (!rank_prec_comm) {
2296     dof_displs[0]=0;
2297     for (i=1;i<size_prec_comm;i++) {
2298       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2299     }
2300   }
2301   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2302   if (first_found) {
2303     array[first_index] += (PetscScalar)nlocals;
2304     old_index = first_index;
2305     for (i=first_index+1;i<s;i++) {
2306       if (PetscRealPart(array[i]) > 0.0) {
2307         array[i] += array[old_index];
2308         old_index = i;
2309       }
2310     }
2311   }
2312   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2313   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2314   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2315   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2316   /* get global ordering of local dofs */
2317   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2318   if (local_dofs_mult) {
2319     for (i=0;i<n_local_dofs;i++) {
2320       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2321     }
2322   } else {
2323     for (i=0;i<n_local_dofs;i++) {
2324       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2325     }
2326   }
2327   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2328   /* free workspace */
2329   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2330   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2331   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2332   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2333   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2334   /* return pointer to global ordering of local dofs */
2335   *global_numbering_subset = temp_global_dofs;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2341 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2342 {
2343   PetscInt       i,j;
2344   PetscScalar    *alphas;
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   /* this implements stabilized Gram-Schmidt */
2349   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2350   for (i=0;i<n;i++) {
2351     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2352     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2353     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2354   }
2355   ierr = PetscFree(alphas);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatConvert_IS_AIJ"
2362 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2363 {
2364   Mat new_mat;
2365   Mat_IS *matis = (Mat_IS*)(mat->data);
2366   /* info on mat */
2367   PetscInt bs,rows,cols;
2368   PetscInt lrows,lcols;
2369   PetscInt local_rows,local_cols;
2370   PetscMPIInt nsubdomains;
2371   /* preallocation */
2372   Vec vec_dnz,vec_onz;
2373   PetscScalar *my_dnz,*my_onz,*array;
2374   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2375   PetscInt  index_row,index_col,owner;
2376   PetscInt  *local_indices,*global_indices;
2377   /* work */
2378   PetscInt i,j;
2379   PetscErrorCode ierr;
2380 
2381   PetscFunctionBegin;
2382   /* CHECKS */
2383   /* get info from mat */
2384   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2385   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2386   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2387   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2388 
2389   /* MAT_INITIAL_MATRIX */
2390   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2391   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2392   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2393   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2394   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2395   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2396 
2397   /*
2398     preallocation
2399   */
2400 
2401   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2402   /*
2403      Some vectors are needed to sum up properly on shared interface dofs.
2404      Note that preallocation is not exact, since it overestimates nonzeros
2405   */
2406 /*
2407   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2408   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2409   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2410   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2411 */
2412   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2413   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2414   /* All processes need to compute entire row ownership */
2415   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2416   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2417   for (i=0;i<nsubdomains;i++) {
2418     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2419       row_ownership[j]=i;
2420     }
2421   }
2422   /* map indices of local mat to global values */
2423   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2424   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2425   for (i=0;i<local_rows;i++) local_indices[i]=i;
2426   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2427   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2428 
2429   /*
2430      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2431      then, they will be summed up properly. This way, preallocation is always sufficient
2432   */
2433   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2434   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2435   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2436   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2437   for (i=0;i<local_rows;i++) {
2438     index_row = global_indices[i];
2439     for (j=i;j<local_rows;j++) {
2440       owner = row_ownership[index_row];
2441       index_col = global_indices[j];
2442       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2443         my_dnz[i] += 1.0;
2444       } else { /* offdiag block */
2445         my_onz[i] += 1.0;
2446       }
2447       /* same as before, interchanging rows and cols */
2448       if (i != j) {
2449         owner = row_ownership[index_col];
2450         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2451           my_dnz[j] += 1.0;
2452         } else {
2453           my_onz[j] += 1.0;
2454         }
2455       }
2456     }
2457   }
2458   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2459   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2460   if (local_rows) { /* multilevel guard */
2461     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2462     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2463   }
2464   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2465   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2466   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2467   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2468   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2469   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2470   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2471 
2472   /* set computed preallocation in dnz and onz */
2473   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2474   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2475   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2476   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2477   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2478   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2479   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2480   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2481 
2482   /* Resize preallocation if overestimated */
2483   for (i=0;i<lrows;i++) {
2484     dnz[i] = PetscMin(dnz[i],lcols);
2485     onz[i] = PetscMin(onz[i],cols-lcols);
2486   }
2487   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2488   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2489 
2490   /*
2491     Set values. Very Basic.
2492   */
2493   for (i=0;i<local_rows;i++) {
2494     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2495     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2496     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2497     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2498     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2499   }
2500   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2502   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2503 
2504   /* get back mat */
2505   *M = new_mat;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatISSubassemble_Private"
2511 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2512 {
2513   Mat             subdomain_adj;
2514   IS              new_ranks,ranks_send_to;
2515   MatPartitioning partitioner;
2516   Mat_IS          *matis;
2517   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2518   PetscInt        prank;
2519   PetscMPIInt     size,rank,color;
2520   PetscInt        *xadj,*adjncy,*oldranks;
2521   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2522   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2523   PetscErrorCode  ierr;
2524   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2525   PetscSubcomm    subcomm;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2529   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2530   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2531 
2532   /* Get info on mapping */
2533   matis = (Mat_IS*)(mat->data);
2534   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2535   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2536 
2537   /* build local CSR graph of subdomains' connectivity */
2538   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2539   xadj[0] = 0;
2540   xadj[1] = PetscMax(n_neighs-1,0);
2541   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2542   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2543 
2544   if (threshold) {
2545     PetscInt* count,min_threshold;
2546     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2547     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2548     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2549       for (j=0;j<n_shared[i];j++) {
2550         count[shared[i][j]] += 1;
2551       }
2552     }
2553     /* adapt threshold since we dont want to lose significant connections */
2554     min_threshold = n_neighs;
2555     for (i=1;i<n_neighs;i++) {
2556       for (j=0;j<n_shared[i];j++) {
2557         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2558       }
2559     }
2560     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2561     threshold = PetscMax(min_threshold+1,threshold);
2562     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2563     xadj[1] = 0;
2564     for (i=1;i<n_neighs;i++) {
2565       for (j=0;j<n_shared[i];j++) {
2566         if (count[shared[i][j]] < threshold) {
2567           adjncy[xadj[1]] = neighs[i];
2568           adjncy_wgt[xadj[1]] = n_shared[i];
2569           xadj[1]++;
2570           break;
2571         }
2572       }
2573     }
2574     ierr = PetscFree(count);CHKERRQ(ierr);
2575   } else {
2576     if (xadj[1]) {
2577       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2578       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2579     }
2580   }
2581   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2582   if (use_square) {
2583     for (i=0;i<xadj[1];i++) {
2584       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2585     }
2586   }
2587   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2588 
2589   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2590 
2591   /*
2592     Restrict work on active processes only.
2593   */
2594   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2595   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2596   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2597   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2598   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2599   if (color) {
2600     ierr = PetscFree(xadj);CHKERRQ(ierr);
2601     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2602     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2603   } else {
2604     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2605     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2606     prank = rank;
2607     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2608     for (i=0;i<size;i++) {
2609       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2610     }
2611     for (i=0;i<xadj[1];i++) {
2612       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2613     }
2614     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2615     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2616     n_subdomains = (PetscInt)size;
2617     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2618 
2619     /* Partition */
2620     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2621     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2622     if (use_vwgt) {
2623       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2624       v_wgt[0] = local_size;
2625       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2626     }
2627     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2628     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2629     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2630     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2631     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2632 
2633     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2634     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2635     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2636     /* clean up */
2637     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2638     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2639     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2640     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2641   }
2642   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2643 
2644   /* assemble parallel IS for sends */
2645   i = 1;
2646   if (color) i=0;
2647   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2648   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2649 
2650   /* get back IS */
2651   *is_sends = ranks_send_to;
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatISSubassemble"
2659 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2660 {
2661   Mat                    local_mat,new_mat;
2662   Mat_IS                 *matis;
2663   IS                     is_sends_internal;
2664   PetscInt               rows,cols;
2665   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2666   PetscBool              ismatis,isdense;
2667   ISLocalToGlobalMapping l2gmap;
2668   PetscInt*              l2gmap_indices;
2669   const PetscInt*        is_indices;
2670   MatType                new_local_type;
2671   MatTypePrivate         new_local_type_private;
2672   /* buffers */
2673   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2674   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2675   /* MPI */
2676   MPI_Comm               comm;
2677   PetscMPIInt            n_sends,n_recvs,commsize;
2678   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2679   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2680   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2681   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2682   PetscErrorCode         ierr;
2683 
2684   PetscFunctionBegin;
2685   /* checks */
2686   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2687   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2688   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2689   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2690   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2691   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2692   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2693   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2694   PetscValidLogicalCollectiveInt(mat,bs,0);
2695   /* prepare IS for sending if not provided */
2696   if (!is_sends) {
2697     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2698     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2699   } else {
2700     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2701     is_sends_internal = is_sends;
2702   }
2703 
2704   /* get pointer of MATIS data */
2705   matis = (Mat_IS*)mat->data;
2706 
2707   /* get comm */
2708   comm = PetscObjectComm((PetscObject)mat);
2709 
2710   /* compute number of sends */
2711   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2712   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2713 
2714   /* compute number of receives */
2715   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2716   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2717   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2718   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2719   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2720   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2721   ierr = PetscFree(iflags);CHKERRQ(ierr);
2722 
2723   /* prepare send/receive buffers */
2724   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2725   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2726   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2727   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2728 
2729   /* Get data from local mat */
2730   if (!isdense) {
2731     /* TODO: See below some guidelines on how to prepare the local buffers */
2732     /*
2733        send_buffer_vals should contain the raw values of the local matrix
2734        send_buffer_idxs should contain:
2735        - MatType_PRIVATE type
2736        - PetscInt        size_of_l2gmap
2737        - PetscInt        global_row_indices[size_of_l2gmap]
2738        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2739     */
2740   } else {
2741     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2742     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2743     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2744     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2745     send_buffer_idxs[1] = i;
2746     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2747     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2748     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2749     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2750     for (i=0;i<n_sends;i++) {
2751       ilengths_vals[is_indices[i]] = len*len;
2752       ilengths_idxs[is_indices[i]] = len+2;
2753     }
2754   }
2755   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2756   buf_size_idxs = 0;
2757   buf_size_vals = 0;
2758   for (i=0;i<n_recvs;i++) {
2759     buf_size_idxs += (PetscInt)olengths_idxs[i];
2760     buf_size_vals += (PetscInt)olengths_vals[i];
2761   }
2762   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2763   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2764 
2765   /* get new tags for clean communications */
2766   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2767   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2768 
2769   /* allocate for requests */
2770   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2771   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2772   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2773   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2774 
2775   /* communications */
2776   ptr_idxs = recv_buffer_idxs;
2777   ptr_vals = recv_buffer_vals;
2778   for (i=0;i<n_recvs;i++) {
2779     source_dest = onodes[i];
2780     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2781     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2782     ptr_idxs += olengths_idxs[i];
2783     ptr_vals += olengths_vals[i];
2784   }
2785   for (i=0;i<n_sends;i++) {
2786     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2787     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2788     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2789   }
2790   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2791   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2792 
2793   /* assemble new l2g map */
2794   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2795   ptr_idxs = recv_buffer_idxs;
2796   buf_size_idxs = 0;
2797   for (i=0;i<n_recvs;i++) {
2798     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2799     ptr_idxs += olengths_idxs[i];
2800   }
2801   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2802   ptr_idxs = recv_buffer_idxs;
2803   buf_size_idxs = 0;
2804   for (i=0;i<n_recvs;i++) {
2805     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2806     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2807     ptr_idxs += olengths_idxs[i];
2808   }
2809   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2810   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2811   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2812 
2813   /* infer new local matrix type from received local matrices type */
2814   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2815   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2816   new_local_type_private = MATAIJ_PRIVATE;
2817   new_local_type = MATSEQAIJ;
2818   if (n_recvs) {
2819     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2820     ptr_idxs = recv_buffer_idxs;
2821     for (i=0;i<n_recvs;i++) {
2822       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2823         new_local_type_private = MATAIJ_PRIVATE;
2824         break;
2825       }
2826       ptr_idxs += olengths_idxs[i];
2827     }
2828     switch (new_local_type_private) {
2829       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2830         new_local_type = MATSEQAIJ;
2831         bs = 1;
2832         break;
2833       case MATAIJ_PRIVATE:
2834         new_local_type = MATSEQAIJ;
2835         bs = 1;
2836         break;
2837       case MATBAIJ_PRIVATE:
2838         new_local_type = MATSEQBAIJ;
2839         break;
2840       case MATSBAIJ_PRIVATE:
2841         new_local_type = MATSEQSBAIJ;
2842         break;
2843       default:
2844         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2845         break;
2846     }
2847   }
2848 
2849   /* create MATIS object */
2850   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2851   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2852   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2853   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2854   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2855   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2856 
2857   /* set values */
2858   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2859   ptr_vals = recv_buffer_vals;
2860   ptr_idxs = recv_buffer_idxs;
2861   for (i=0;i<n_recvs;i++) {
2862     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2863       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2864       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2865       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2866       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2867       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2868     }
2869     ptr_idxs += olengths_idxs[i];
2870     ptr_vals += olengths_vals[i];
2871   }
2872   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2873   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2874   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2875   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876 
2877   { /* check */
2878     Vec       lvec,rvec;
2879     PetscReal infty_error;
2880 
2881     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2882     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2883     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2884     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2885     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2886     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2887     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2888     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2889     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2890   }
2891 
2892   /* free workspace */
2893   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2894   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2895   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2896   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2897   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2898   if (isdense) {
2899     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2900     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2901   } else {
2902     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2903   }
2904   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2905   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2906   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2907   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2908   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2909   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2910   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2911   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2912   ierr = PetscFree(onodes);CHKERRQ(ierr);
2913   /* get back new mat */
2914   *mat_n = new_mat;
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 #undef __FUNCT__
2919 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2920 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2921 {
2922   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2923   PC_IS                  *pcis = (PC_IS*)pc->data;
2924   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2925   MatNullSpace           CoarseNullSpace=NULL;
2926   ISLocalToGlobalMapping coarse_islg;
2927   IS                     coarse_is;
2928   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2929   PetscInt               im_active=-1,active_procs=-1;
2930   PC                     pc_temp;
2931   PCType                 coarse_pc_type;
2932   KSPType                coarse_ksp_type;
2933   PetscBool              multilevel_requested,multilevel_allowed;
2934   PetscBool              setsym,issym,isbddc,isnn;
2935   MatStructure           matstruct;
2936   PetscErrorCode         ierr;
2937 
2938   PetscFunctionBegin;
2939   /* Assign global numbering to coarse dofs */
2940   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2941 
2942   /* infer some info from user */
2943   issym = PETSC_FALSE;
2944   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2945   multilevel_allowed = PETSC_FALSE;
2946   multilevel_requested = PETSC_FALSE;
2947   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2948   if (multilevel_requested) {
2949     /* count "active processes" */
2950     im_active = !!(pcis->n);
2951     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2952     if (active_procs/pcbddc->coarsening_ratio < 2) {
2953       multilevel_allowed = PETSC_FALSE;
2954     } else {
2955       multilevel_allowed = PETSC_TRUE;
2956     }
2957   }
2958 
2959   /* set defaults for coarse KSP and PC */
2960   if (multilevel_allowed) {
2961     if (issym) {
2962       coarse_ksp_type = KSPRICHARDSON;
2963     } else {
2964       coarse_ksp_type = KSPCHEBYSHEV;
2965     }
2966     coarse_pc_type = PCBDDC;
2967   } else {
2968     coarse_ksp_type = KSPPREONLY;
2969     coarse_pc_type = PCREDUNDANT;
2970   }
2971 
2972   /* create the coarse KSP object only once with defaults */
2973   if (!pcbddc->coarse_ksp) {
2974     char prefix[256],str_level[3];
2975     size_t len;
2976     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2977     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2978     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2979     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2980     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2981     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2982     /* prefix */
2983     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2984     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2985     if (!pcbddc->current_level) {
2986       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2987       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2988     } else {
2989       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2990       if (pcbddc->current_level>1) len -= 2;
2991       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2992       *(prefix+len)='\0';
2993       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2994       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
2995     }
2996     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
2997   }
2998   /* allow user customization */
2999   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3000   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3001   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3002 
3003   /* get some info after set from options */
3004   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3005   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3007   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3008     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3009     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3010     isbddc = PETSC_FALSE;
3011   }
3012 
3013   /* propagate BDDC info to the next level */
3014   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3015   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3016   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3017 
3018   /* creates temporary MATIS object for coarse matrix */
3019   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3020   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3021   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3022   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3023   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3024   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3026   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3027   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3028   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3029 
3030   /* assemble coarse matrix */
3031   if (isbddc || isnn) {
3032     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3033   } else {
3034     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3035   }
3036   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3037 
3038   /* create local to global scatters for coarse problem */
3039   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3040   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3041   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3042 
3043   /* propagate symmetry info to coarse matrix */
3044   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3045 
3046   /* Compute coarse null space (special handling by BDDC only) */
3047   if (pcbddc->NullSpace) {
3048     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3049     if (isbddc) {
3050       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3051     } else {
3052       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3053     }
3054   }
3055 
3056   /* set operators */
3057   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3058   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3059 
3060   /* additional KSP customization */
3061   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3062   if (max_it < 5) {
3063     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3064   }
3065   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3066 
3067 
3068   /* print some info if requested */
3069   if (pcbddc->dbg_flag) {
3070     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3071     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3072     if (!multilevel_allowed) {
3073       if (multilevel_requested) {
3074         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3075       } else if (pcbddc->max_levels) {
3076         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3077       }
3078     }
3079     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3080     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3081   }
3082 
3083   /* setup coarse ksp */
3084   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3085   if (pcbddc->dbg_flag) {
3086     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3087     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3088     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3089     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3090   }
3091 
3092   /* Check coarse problem if requested */
3093   if (pcbddc->dbg_flag) {
3094     KSP       check_ksp;
3095     KSPType   check_ksp_type;
3096     PC        check_pc;
3097     Vec       check_vec;
3098     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3099     PetscInt  its;
3100     PetscBool ispreonly,compute;
3101 
3102     /* Create ksp object suitable for estimation of extreme eigenvalues */
3103     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3104     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3105     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3106     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3107     if (ispreonly) {
3108       check_ksp_type = KSPPREONLY;
3109       compute = PETSC_FALSE;
3110     } else {
3111       if (issym) check_ksp_type = KSPCG;
3112       else check_ksp_type = KSPGMRES;
3113       compute = PETSC_TRUE;
3114     }
3115     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3116     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3117     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3118     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3119     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3120     /* create random vec */
3121     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3122     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3123     if (CoarseNullSpace) {
3124       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3125     }
3126     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3127     /* solve coarse problem */
3128     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3129     if (CoarseNullSpace) {
3130       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3131     }
3132     /* check coarse problem residual error */
3133     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3134     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3135     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3136     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3137     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3138     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3139     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3140     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3141     /* get eigenvalue estimation if preonly has not been requested */
3142     if (!ispreonly) {
3143       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3144       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3145       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3146     }
3147     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3148     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3149   }
3150   /* free memory */
3151   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3152   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3158 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3159 {
3160   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3161   PC_IS*         pcis = (PC_IS*)pc->data;
3162   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3163   PetscInt       i,j,coarse_size;
3164   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   /* get indices in local ordering for vertices and constraints */
3169   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3170   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3171   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3172   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3173   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3174   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3175   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3176 
3177   /* Compute global number of coarse dofs */
3178   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3179 
3180   /* check numbering */
3181   if (pcbddc->dbg_flag) {
3182     PetscScalar coarsesum,*array;
3183 
3184     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3185     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3186     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3187     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3188     for (i=0;i<pcbddc->local_primal_size;i++) {
3189       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3190     }
3191     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3192     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3193     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3194     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3195     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3196     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3197     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3198     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3199     for (i=0;i<pcis->n;i++) {
3200       if (array[i] == 1.0) {
3201         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3202       }
3203     }
3204     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3205     for (i=0;i<pcis->n;i++) {
3206       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3207     }
3208     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3209     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3210     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3211     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3212     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3213     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3214     if (pcbddc->dbg_flag > 1) {
3215       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3216       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3217       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3218       for (i=0;i<pcbddc->local_primal_size;i++) {
3219         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3220       }
3221       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3222     }
3223     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3224   }
3225   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3226   /* get back data */
3227   *coarse_size_n = coarse_size;
3228   *local_primal_indices_n = local_primal_indices;
3229   PetscFunctionReturn(0);
3230 }
3231 
3232