xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision eb97c9d281e1e6b405ea8e13f1ece24889ffc2d3)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
166   /* Vector and matrix types */
167   VecType                impVecType;
168   MatType                impMatType;
169   /* some shortcuts to scalars */
170   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
171   /* for debugging purposes */
172   PetscReal              *coarsefunctions_errors,*constraints_errors;
173 
174   PetscFunctionBegin;
175   /* get number of vertices and their local indices */
176   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
177   n_constraints = pcbddc->local_primal_size-n_vertices;
178   /* Set Non-overlapping dimensions */
179   n_B = pcis->n_B; n_D = pcis->n - n_B;
180   n_R = pcis->n-n_vertices;
181 
182   /* Set types for local objects needed by BDDC precondtioner */
183   impMatType = MATSEQDENSE;
184   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
185 
186   /* Allocating some extra storage just to be safe */
187   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
188   for (i=0;i<pcis->n;i++) auxindices[i]=i;
189 
190   /* vertices in boundary numbering */
191   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
193   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
194   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
195   if (i != n_vertices) {
196     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
197   }
198 
199   /* some work vectors on vertices and/or constraints */
200   if (n_vertices) {
201     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
202     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
203     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
204     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
205   }
206   if (n_constraints) {
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
208     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
209   }
210 
211   /* Precompute stuffs needed for preprocessing and application of BDDC*/
212   if (n_constraints) {
213     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
214     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
215     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
216     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
217 
218     /* Extract constraints on R nodes: C_{CR}  */
219     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
220     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
221     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
222 
223     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
224     for (i=0;i<n_constraints;i++) {
225       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
226       /* Get row of constraint matrix in R numbering */
227       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
228       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
229       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
230       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
231       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
232       /* Solve for row of constraint matrix in R numbering */
233       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
234       /* Set values in local_auxmat2 */
235       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
236       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
237       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
238     }
239     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
241     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
242 
243     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
244     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
245     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
246     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
247     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
248     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
249     ierr = MatSetUp(M1);CHKERRQ(ierr);
250     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
251     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
252     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
253     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
254     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
255     ierr = MatDestroy(&M2);CHKERRQ(ierr);
256     ierr = MatDestroy(&M3);CHKERRQ(ierr);
257     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
258     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
259   }
260 
261   /* Get submatrices from subdomain matrix */
262   if (n_vertices) {
263     /* PetscInt ibs,iibs,mbs; */
264     PetscBool issbaij,isbaij;
265 
266     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
267     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
268     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
269     if (issbaij || isbaij) {
270       Mat newmat;
271       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
273       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
274       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
275       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
276     } else {
277       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
279       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
280     }
281 /*
282     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
283     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
284     ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr);
285     if (ibs != mbs || iibs != mbs) {
286       Mat newmat;
287       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
288       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
289       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
290       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
291       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
292     } else {
293       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
294       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
295       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
296     }
297 */
298     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
299   }
300 
301   /* Matrix of coarse basis functions (local) */
302   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
303   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
304   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
305   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
306   if (pcbddc->switch_static || pcbddc->dbg_flag) {
307     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
308     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
309     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
310     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
311   }
312 
313   if (pcbddc->dbg_flag) {
314     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
315     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
316     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
317   }
318   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
319   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
320 
321   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
322 
323   /* vertices */
324   for (i=0;i<n_vertices;i++) {
325     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
326     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
327     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
328     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
329     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
330     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
331     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
332     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
333     if (n_constraints) {
334       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
335       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
336       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
337     }
338     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
339     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
340 
341     /* Set values in coarse basis function and subdomain part of coarse_mat */
342     /* coarse basis functions */
343     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
344     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
345     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
346     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
347     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
348     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
349     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
350     if (pcbddc->switch_static || pcbddc->dbg_flag) {
351       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
352       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
353       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
354       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
355       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
356     }
357     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
358     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
359     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
360     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
361     if (n_constraints) {
362       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
363       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
364       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
365     }
366 
367     /* check */
368     if (pcbddc->dbg_flag) {
369       /* assemble subdomain vector on local nodes */
370       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
371       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
372       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
373       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
374       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
375       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
376       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
377       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
378       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
379       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
380       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
381       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
382       if (n_constraints) {
383         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
384         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
385         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
386       }
387       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
389       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
390       /* check saddle point solution */
391       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
392       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
393       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
394       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
395       /* shift by the identity matrix */
396       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
397       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
398       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
399       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
400     }
401   }
402 
403   /* constraints */
404   for (i=0;i<n_constraints;i++) {
405     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
406     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
407     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
408     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
409     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
410     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
411     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
412     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
413     if (n_vertices) {
414       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
415     }
416     /* Set values in coarse basis function and subdomain part of coarse_mat */
417     /* coarse basis functions */
418     j = i+n_vertices; /* don't touch this! */
419     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
420     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
421     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
422     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
423     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
424     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
425     if (pcbddc->switch_static || pcbddc->dbg_flag) {
426       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
427       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
428       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
429       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
431     }
432     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
433     if (n_vertices) {
434       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
435       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
437     }
438     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
439     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
440     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
441 
442     if (pcbddc->dbg_flag) {
443       /* assemble subdomain vector on nodes */
444       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
445       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
446       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
447       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
450       /* assemble subdomain vector of lagrange multipliers */
451       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
452       if (n_vertices) {
453         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
454         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
455         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
456       }
457       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
458       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
459       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
460       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
462       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
463       /* check saddle point solution */
464       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
465       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
466       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
467       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
468       /* shift by the identity matrix */
469       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
470       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
471       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
472       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
473     }
474   }
475   /* call assembling routines for local coarse basis */
476   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
477   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
478   if (pcbddc->switch_static || pcbddc->dbg_flag) {
479     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
480     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
481   }
482 
483   /* compute other basis functions for non-symmetric problems */
484   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
485   if (!setsym || (setsym && !issym)) {
486     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
487     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
488     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
489     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
490     if (pcbddc->switch_static || pcbddc->dbg_flag) {
491       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
492       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
493       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
494       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
495     }
496     for (i=0;i<pcbddc->local_primal_size;i++) {
497       if (n_constraints) {
498         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
499         for (j=0;j<n_constraints;j++) {
500           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
501         }
502         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
503         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
504       }
505       if (i<n_vertices) {
506         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
507         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
509         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
510         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
511         if (n_constraints) {
512           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
513         }
514       } else {
515         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
516       }
517       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
518       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
519       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
520       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
521       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
522       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
523       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
524       if (i<n_vertices) {
525         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
526       }
527       if (pcbddc->switch_static || pcbddc->dbg_flag) {
528         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
529         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
530         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
531         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
532         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
533       }
534 
535       if (pcbddc->dbg_flag) {
536         /* assemble subdomain vector on nodes */
537         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
538         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
539         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
540         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
541         if (i<n_vertices) {
542           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
543         }
544         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
545         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
546         /* assemble subdomain vector of lagrange multipliers */
547         for (j=0;j<pcbddc->local_primal_size;j++) {
548           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
549         }
550         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
551         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
552         /* check saddle point solution */
553         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
554         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
555         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
556         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
557         /* shift by the identity matrix */
558         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
559         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
560         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
561         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
562       }
563     }
564     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     if (pcbddc->switch_static || pcbddc->dbg_flag) {
567       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
568       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     }
570   }
571   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
572   /* Checking coarse_sub_mat and coarse basis functios */
573   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
574   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
575   if (pcbddc->dbg_flag) {
576     Mat         coarse_sub_mat;
577     Mat         AUXMAT,TM1,TM2,TM3,TM4;
578     Mat         coarse_phi_D,coarse_phi_B;
579     Mat         coarse_psi_D,coarse_psi_B;
580     Mat         A_II,A_BB,A_IB,A_BI;
581     MatType     checkmattype=MATSEQAIJ;
582     PetscReal   real_value;
583 
584     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
586     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
587     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
588     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
589     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
590     if (pcbddc->coarse_psi_B) {
591       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
592       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
593     }
594     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
595 
596     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
597     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
598     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
599     if (pcbddc->coarse_psi_B) {
600       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
601       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
602       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
603       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
604       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
605       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
606       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
607       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
608       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
609       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
610       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
611       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
612     } else {
613       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
614       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
615       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
616       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
617       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
618       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
619       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
620       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
621     }
622     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
626     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
627     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
630     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
631     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
632     for (i=0;i<pcbddc->local_primal_size;i++) {
633       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
634     }
635     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
636     for (i=0;i<pcbddc->local_primal_size;i++) {
637       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
638     }
639     if (pcbddc->coarse_psi_B) {
640       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
641       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
642         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
643       }
644       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
645       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
646         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
647       }
648     }
649     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
652     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
653     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
656     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
657     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
658     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
659     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
660     if (pcbddc->coarse_psi_B) {
661       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
662       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
663     }
664     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
665     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
666     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
667     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
668   }
669   /* free memory */
670   if (n_vertices) {
671     ierr = PetscFree(vertices);CHKERRQ(ierr);
672     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
673     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
675     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
676     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
677   }
678   if (n_constraints) {
679     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
680     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
681     ierr = MatDestroy(&M1);CHKERRQ(ierr);
682     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
683   }
684   ierr = PetscFree(auxindices);CHKERRQ(ierr);
685   /* get back data */
686   *coarse_submat_vals_n = coarse_submat_vals;
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
692 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
693 {
694   PC_IS*            pcis = (PC_IS*)(pc->data);
695   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
696   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
697   PetscBool         issbaij,isbaij;
698   /* manage repeated solves */
699   MatReuse          reuse;
700   MatStructure      matstruct;
701   PetscErrorCode    ierr;
702 
703   PetscFunctionBegin;
704   /* get mat flags */
705   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
706   reuse = MAT_INITIAL_MATRIX;
707   if (pc->setupcalled) {
708     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
709     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
710     if (matstruct == SAME_NONZERO_PATTERN) {
711       reuse = MAT_REUSE_MATRIX;
712     } else {
713       reuse = MAT_INITIAL_MATRIX;
714     }
715   }
716   if (reuse == MAT_INITIAL_MATRIX) {
717     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
720     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
721     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
722   }
723 
724   /* transform local matrices if needed */
725   if (pcbddc->use_change_of_basis) {
726     Mat         change_mat_all;
727     PetscScalar *row_cmat_values;
728     PetscInt    *row_cmat_indices;
729     PetscInt    *nnz,*is_indices,*temp_indices;
730     PetscInt    i,j,k,n_D,n_B;
731 
732     /* Get Non-overlapping dimensions */
733     n_B = pcis->n_B;
734     n_D = pcis->n-n_B;
735 
736     /* compute nonzero structure of change of basis on all local nodes */
737     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
738     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
740     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
742     k=1;
743     for (i=0;i<n_B;i++) {
744       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
745       nnz[is_indices[i]]=j;
746       if (k < j) k = j;
747       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
748     }
749     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
750     /* assemble change of basis matrix on the whole set of local dofs */
751     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
752     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
753     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
754     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
755     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
756     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
757     for (i=0;i<n_D;i++) {
758       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
759     }
760     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
761     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
762     for (i=0;i<n_B;i++) {
763       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
764       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
765       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
766       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
771     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
772     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
773     if (!issbaij && !isbaij) {
774       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
775     } else {
776       Mat work_mat;
777       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
778       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
779       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
780     }
781     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
782     ierr = PetscFree(nnz);CHKERRQ(ierr);
783     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
784   } else {
785     /* without change of basis, the local matrix is unchanged */
786     if (!pcbddc->local_mat) {
787       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
788       pcbddc->local_mat = matis->A;
789     }
790   }
791 
792   /* get submatrices */
793   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
794   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
795   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
796   if (!issbaij) {
797     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
798     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
799   } else {
800     Mat newmat;
801     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
802     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
803     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
804     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
811 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
812 {
813   PC_IS*         pcis = (PC_IS*)(pc->data);
814   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
815   IS             is_aux1,is_aux2;
816   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
817   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
818   PetscBT        bitmask;
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   /* Set Non-overlapping dimensions */
823   n_B = pcis->n_B; n_D = pcis->n - n_B;
824   /* get vertex indices from constraint matrix */
825   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
826   /* Set number of constraints */
827   n_constraints = pcbddc->local_primal_size-n_vertices;
828   /* create auxiliary bitmask */
829   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
830   for (i=0;i<n_vertices;i++) {
831     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
832   }
833   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
834   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
835   for (i=0, n_R=0; i<pcis->n; i++) {
836     if (!PetscBTLookup(bitmask,i)) {
837       idx_R_local[n_R] = i;
838       n_R++;
839     }
840   }
841   ierr = PetscFree(vertices);CHKERRQ(ierr);
842   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
843 
844   /* print some info if requested */
845   if (pcbddc->dbg_flag) {
846     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
847     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
848     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
849     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
850     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
851     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
852     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
853   }
854 
855   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
856   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
857   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
858   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
859   for (i=0; i<n_D; i++) {
860     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
861   }
862   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
863   for (i=0, j=0; i<n_R; i++) {
864     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
865       aux_array1[j++] = i;
866     }
867   }
868   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
869   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
870   for (i=0, j=0; i<n_B; i++) {
871     if (!PetscBTLookup(bitmask,is_indices[i])) {
872       aux_array2[j++] = i;
873     }
874   }
875   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
876   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
877   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
878   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
879   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
880 
881   if (pcbddc->switch_static || pcbddc->dbg_flag) {
882     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
883     for (i=0, j=0; i<n_R; i++) {
884       if (PetscBTLookup(bitmask,idx_R_local[i])) {
885         aux_array1[j++] = i;
886       }
887     }
888     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
889     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
890     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
891   }
892   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
899 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
900 {
901   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
902   PC_IS          *pcis = (PC_IS*)pc->data;
903   PC             pc_temp;
904   Mat            A_RR;
905   Vec            vec1,vec2,vec3;
906   MatStructure   matstruct;
907   PetscScalar    m_one = -1.0;
908   PetscReal      value;
909   PetscInt       n_D,n_R,ibs,mbs;
910   PetscBool      use_exact,use_exact_reduced,issbaij;
911   PetscErrorCode ierr;
912   /* prefixes stuff */
913   char           dir_prefix[256],neu_prefix[256],str_level[3];
914   size_t         len;
915 
916   PetscFunctionBegin;
917   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
918 
919   /* compute prefixes */
920   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
921   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
922   if (!pcbddc->current_level) {
923     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
924     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
925     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
926     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
927   } else {
928     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
929     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
930     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
931     len -= 15; /* remove "pc_bddc_coarse_" */
932     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
933     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
934     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
935     *(dir_prefix+len)='\0';
936     *(neu_prefix+len)='\0';
937     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
938     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
939     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
940     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
941   }
942 
943   /* DIRICHLET PROBLEM */
944   /* Matrix for Dirichlet problem is pcis->A_II */
945   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
946   if (!pcbddc->ksp_D) { /* create object if not yet build */
947     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
948     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
949     /* default */
950     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
951     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
952     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
953     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
954     if (issbaij) {
955       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
956     } else {
957       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
958     }
959     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
960   }
961   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
962   /* Allow user's customization */
963   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
964   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
965   if (!n_D) {
966     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
967     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
968   }
969   /* Set Up KSP for Dirichlet problem of BDDC */
970   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
971   /* set ksp_D into pcis data */
972   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
973   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
974   pcis->ksp_D = pcbddc->ksp_D;
975 
976   /* NEUMANN PROBLEM */
977   /* Matrix for Neumann problem is A_RR -> we need to create it */
978   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
979   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
980   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
981   if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */
982     Mat newmat;
983     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
984     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
985     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
986   } else {
987     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
988   }
989   if (!pcbddc->ksp_R) { /* create object if not yet build */
990     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
991     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
992     /* default */
993     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
994     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
995     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
996     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
997     if (issbaij) {
998       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
999     } else {
1000       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1001     }
1002     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1003   }
1004   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1005   /* Allow user's customization */
1006   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1007   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1008   if (!n_R) {
1009     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1010     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1011   }
1012   /* Set Up KSP for Neumann problem of BDDC */
1013   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1014 
1015   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1016 
1017   /* Dirichlet */
1018   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1019   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1020   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1021   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1022   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1023   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1024   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1025   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1026   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1027   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1028   /* need to be adapted? */
1029   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1030   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1031   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1032   /* print info */
1033   if (pcbddc->dbg_flag) {
1034     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1035     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1036     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1037     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1038     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1039   }
1040   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1041     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1042   }
1043 
1044   /* Neumann */
1045   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1046   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1047   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1048   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1049   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1050   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1051   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1052   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1053   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1054   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1055   /* need to be adapted? */
1056   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1057   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1058   /* print info */
1059   if (pcbddc->dbg_flag) {
1060     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1061     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1062   }
1063   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1064     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1065   }
1066   /* free Neumann problem's matrix */
1067   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1073 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1074 {
1075   PetscErrorCode ierr;
1076   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1077 
1078   PetscFunctionBegin;
1079   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1080   if (pcbddc->local_auxmat1) {
1081     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1082     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1089 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1090 {
1091   PetscErrorCode ierr;
1092   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1093   PC_IS*            pcis = (PC_IS*)  (pc->data);
1094   const PetscScalar zero = 0.0;
1095 
1096   PetscFunctionBegin;
1097   /* Application of PHI^T (or PSI^T)  */
1098   if (pcbddc->coarse_psi_B) {
1099     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1100     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1101   } else {
1102     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1103     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1104   }
1105   /* Scatter data of coarse_rhs */
1106   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1107   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108 
1109   /* Local solution on R nodes */
1110   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1111   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   if (pcbddc->switch_static) {
1114     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1115     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   }
1117   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1118   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1119   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121   if (pcbddc->switch_static) {
1122     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1123     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1124   }
1125 
1126   /* Coarse solution */
1127   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1128   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1129     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1130   }
1131   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1132   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133 
1134   /* Sum contributions from two levels */
1135   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1136   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1142 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1143 {
1144   PetscErrorCode ierr;
1145   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1146 
1147   PetscFunctionBegin;
1148   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1154 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1155 {
1156   PetscErrorCode ierr;
1157   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1158 
1159   PetscFunctionBegin;
1160   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /* uncomment for testing purposes */
1165 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1166 #undef __FUNCT__
1167 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1168 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1169 {
1170   PetscErrorCode    ierr;
1171   PC_IS*            pcis = (PC_IS*)(pc->data);
1172   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1173   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1174   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1175   MatType           impMatType=MATSEQAIJ;
1176   /* one and zero */
1177   PetscScalar       one=1.0,zero=0.0;
1178   /* space to store constraints and their local indices */
1179   PetscScalar       *temp_quadrature_constraint;
1180   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1181   /* iterators */
1182   PetscInt          i,j,k,total_counts,temp_start_ptr;
1183   /* stuff to store connected components stored in pcbddc->mat_graph */
1184   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1185   PetscInt          n_ISForFaces,n_ISForEdges;
1186   /* near null space stuff */
1187   MatNullSpace      nearnullsp;
1188   const Vec         *nearnullvecs;
1189   Vec               *localnearnullsp;
1190   PetscBool         nnsp_has_cnst;
1191   PetscInt          nnsp_size;
1192   PetscScalar       *array;
1193   /* BLAS integers */
1194   PetscBLASInt      lwork,lierr;
1195   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1196   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1197   /* LAPACK working arrays for SVD or POD */
1198   PetscBool         skip_lapack;
1199   PetscScalar       *work;
1200   PetscReal         *singular_vals;
1201 #if defined(PETSC_USE_COMPLEX)
1202   PetscReal         *rwork;
1203 #endif
1204 #if defined(PETSC_MISSING_LAPACK_GESVD)
1205   PetscBLASInt      Blas_one_2=1;
1206   PetscScalar       *temp_basis,*correlation_mat;
1207 #else
1208   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1209   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1210 #endif
1211   /* change of basis */
1212   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1213   PetscBool         boolforchange,qr_needed;
1214   PetscBT           touched,change_basis,qr_needed_idx;
1215   /* auxiliary stuff */
1216   PetscInt          *nnz,*is_indices,*local_to_B;
1217   /* some quantities */
1218   PetscInt          n_vertices,total_primal_vertices;
1219   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1220 
1221 
1222   PetscFunctionBegin;
1223   /* Get index sets for faces, edges and vertices from graph */
1224   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1225     pcbddc->use_vertices = PETSC_TRUE;
1226   }
1227   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1228   /* HACK: provide functions to set change of basis */
1229   if (!ISForVertices && pcbddc->NullSpace) {
1230     pcbddc->use_change_of_basis = PETSC_TRUE;
1231     pcbddc->use_change_on_faces = PETSC_FALSE;
1232   }
1233   /* print some info */
1234   if (pcbddc->dbg_flag) {
1235     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1236     i = 0;
1237     if (ISForVertices) {
1238       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1239     }
1240     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1241     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1242     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1243     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1244   }
1245   /* check if near null space is attached to global mat */
1246   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1247   if (nearnullsp) {
1248     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1249   } else { /* if near null space is not provided BDDC uses constants by default */
1250     nnsp_size = 0;
1251     nnsp_has_cnst = PETSC_TRUE;
1252   }
1253   /* get max number of constraints on a single cc */
1254   max_constraints = nnsp_size;
1255   if (nnsp_has_cnst) max_constraints++;
1256 
1257   /*
1258        Evaluate maximum storage size needed by the procedure
1259        - temp_indices will contain start index of each constraint stored as follows
1260        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1261        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1262        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1263                                                                                                                                                          */
1264   total_counts = n_ISForFaces+n_ISForEdges;
1265   total_counts *= max_constraints;
1266   n_vertices = 0;
1267   if (ISForVertices) {
1268     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1269   }
1270   total_counts += n_vertices;
1271   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1272   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1273   total_counts = 0;
1274   max_size_of_constraint = 0;
1275   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1276     if (i<n_ISForEdges) {
1277       used_IS = &ISForEdges[i];
1278     } else {
1279       used_IS = &ISForFaces[i-n_ISForEdges];
1280     }
1281     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1282     total_counts += j;
1283     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1284   }
1285   total_counts *= max_constraints;
1286   total_counts += n_vertices;
1287   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1288   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1289   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1290   /* local to boundary numbering */
1291   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1292   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1293   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1294   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1295   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1296   /* get local part of global near null space vectors */
1297   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1298   for (k=0;k<nnsp_size;k++) {
1299     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1300     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1301     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302   }
1303 
1304   /* whether or not to skip lapack calls */
1305   skip_lapack = PETSC_TRUE;
1306   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1307 
1308   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1309   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1310     PetscScalar temp_work;
1311 #if defined(PETSC_MISSING_LAPACK_GESVD)
1312     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1313     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1314     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1315     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1316 #if defined(PETSC_USE_COMPLEX)
1317     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1318 #endif
1319     /* now we evaluate the optimal workspace using query with lwork=-1 */
1320     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1321     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1322     lwork = -1;
1323     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1324 #if !defined(PETSC_USE_COMPLEX)
1325     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1326 #else
1327     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1328 #endif
1329     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1330     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1331 #else /* on missing GESVD */
1332     /* SVD */
1333     PetscInt max_n,min_n;
1334     max_n = max_size_of_constraint;
1335     min_n = max_constraints;
1336     if (max_size_of_constraint < max_constraints) {
1337       min_n = max_size_of_constraint;
1338       max_n = max_constraints;
1339     }
1340     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1341 #if defined(PETSC_USE_COMPLEX)
1342     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1343 #endif
1344     /* now we evaluate the optimal workspace using query with lwork=-1 */
1345     lwork = -1;
1346     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1347     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1348     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1349     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1350 #if !defined(PETSC_USE_COMPLEX)
1351     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1352 #else
1353     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1354 #endif
1355     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1356     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1357 #endif /* on missing GESVD */
1358     /* Allocate optimal workspace */
1359     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1360     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1361   }
1362   /* Now we can loop on constraining sets */
1363   total_counts = 0;
1364   temp_indices[0] = 0;
1365   /* vertices */
1366   if (ISForVertices) {
1367     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1368     if (nnsp_has_cnst) { /* consider all vertices */
1369       for (i=0;i<n_vertices;i++) {
1370         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1371         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1372         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1373         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1374         total_counts++;
1375       }
1376     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1377       PetscBool used_vertex;
1378       for (i=0;i<n_vertices;i++) {
1379         used_vertex = PETSC_FALSE;
1380         k = 0;
1381         while (!used_vertex && k<nnsp_size) {
1382           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1383           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1384             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1385             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1386             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1387             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1388             total_counts++;
1389             used_vertex = PETSC_TRUE;
1390           }
1391           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1392           k++;
1393         }
1394       }
1395     }
1396     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1397     n_vertices = total_counts;
1398   }
1399 
1400   /* edges and faces */
1401   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1402     if (i<n_ISForEdges) {
1403       used_IS = &ISForEdges[i];
1404       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1405     } else {
1406       used_IS = &ISForFaces[i-n_ISForEdges];
1407       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1408     }
1409     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1410     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1411     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1412     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1413     /* change of basis should not be performed on local periodic nodes */
1414     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1415     if (nnsp_has_cnst) {
1416       PetscScalar quad_value;
1417       temp_constraints++;
1418       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1419       for (j=0;j<size_of_constraint;j++) {
1420         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1421         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1422         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1423       }
1424       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1425       if (boolforchange) {
1426         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1427       }
1428       total_counts++;
1429     }
1430     for (k=0;k<nnsp_size;k++) {
1431       PetscReal real_value;
1432       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1433       for (j=0;j<size_of_constraint;j++) {
1434         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1435         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1436         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1437       }
1438       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1439       /* check if array is null on the connected component */
1440       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1441       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1442       if (real_value > 0.0) { /* keep indices and values */
1443         temp_constraints++;
1444         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1445         if (boolforchange) {
1446           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1447         }
1448         total_counts++;
1449       }
1450     }
1451     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1452     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1453     if (!pcbddc->use_nnsp_true && temp_constraints) {
1454       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1455 
1456 #if defined(PETSC_MISSING_LAPACK_GESVD)
1457       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1458          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1459          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1460             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1461             from that computed using LAPACKgesvd
1462          -> This is due to a different computation of eigenvectors in LAPACKheev
1463          -> The quality of the POD-computed basis will be the same */
1464       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1465       /* Store upper triangular part of correlation matrix */
1466       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1467       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1468       for (j=0;j<temp_constraints;j++) {
1469         for (k=0;k<j+1;k++) {
1470           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1471         }
1472       }
1473       /* compute eigenvalues and eigenvectors of correlation matrix */
1474       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1475       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1476 #if !defined(PETSC_USE_COMPLEX)
1477       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1478 #else
1479       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1480 #endif
1481       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1482       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1483       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1484       j=0;
1485       while (j < temp_constraints && singular_vals[j] < tol) j++;
1486       total_counts=total_counts-j;
1487       /* scale and copy POD basis into used quadrature memory */
1488       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1489       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1490       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1491       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1492       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1493       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1494       if (j<temp_constraints) {
1495         PetscInt ii;
1496         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1497         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1498         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1499         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1500         for (k=0;k<temp_constraints-j;k++) {
1501           for (ii=0;ii<size_of_constraint;ii++) {
1502             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1503           }
1504         }
1505       }
1506 #else  /* on missing GESVD */
1507       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1508       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1510       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1511 #if !defined(PETSC_USE_COMPLEX)
1512       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1513 #else
1514       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1515 #endif
1516       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1517       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1518       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1519       k = temp_constraints;
1520       if (k > size_of_constraint) k = size_of_constraint;
1521       j = 0;
1522       while (j < k && singular_vals[k-j-1] < tol) j++;
1523       total_counts = total_counts-temp_constraints+k-j;
1524 #endif /* on missing GESVD */
1525     }
1526   }
1527   /* free index sets of faces, edges and vertices */
1528   for (i=0;i<n_ISForFaces;i++) {
1529     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1530   }
1531   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1532   for (i=0;i<n_ISForEdges;i++) {
1533     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1534   }
1535   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1536   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1537 
1538   /* free workspace */
1539   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1540     ierr = PetscFree(work);CHKERRQ(ierr);
1541 #if defined(PETSC_USE_COMPLEX)
1542     ierr = PetscFree(rwork);CHKERRQ(ierr);
1543 #endif
1544     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1545 #if defined(PETSC_MISSING_LAPACK_GESVD)
1546     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1547     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1548 #endif
1549   }
1550   for (k=0;k<nnsp_size;k++) {
1551     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1552   }
1553   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1554 
1555   /* set quantities in pcbddc data structure */
1556   /* n_vertices defines the number of subdomain corners in the primal space */
1557   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1558   pcbddc->local_primal_size = total_counts;
1559   pcbddc->n_vertices = n_vertices;
1560   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1561 
1562   /* Create constraint matrix */
1563   /* The constraint matrix is used to compute the l2g map of primal dofs */
1564   /* so we need to set it up properly either with or without change of basis */
1565   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1566   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1567   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1568   /* array to compute a local numbering of constraints : vertices first then constraints */
1569   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1570   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1571   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1572   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1573   /* auxiliary stuff for basis change */
1574   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1575   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1576 
1577   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1578   total_primal_vertices=0;
1579   for (i=0;i<pcbddc->local_primal_size;i++) {
1580     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1581     if (size_of_constraint == 1) {
1582       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1583       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1584       aux_primal_minloc[total_primal_vertices]=0;
1585       total_primal_vertices++;
1586     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1587       PetscInt min_loc,min_index;
1588       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1589       /* find first untouched local node */
1590       k = 0;
1591       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1592       min_index = global_indices[k];
1593       min_loc = k;
1594       /* search the minimum among global nodes already untouched on the cc */
1595       for (k=1;k<size_of_constraint;k++) {
1596         /* there can be more than one constraint on a single connected component */
1597         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1598           min_index = global_indices[k];
1599           min_loc = k;
1600         }
1601       }
1602       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1603       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1604       aux_primal_minloc[total_primal_vertices]=min_loc;
1605       total_primal_vertices++;
1606     }
1607   }
1608   /* determine if a QR strategy is needed for change of basis */
1609   qr_needed = PETSC_FALSE;
1610   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1611   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1612     if (PetscBTLookup(change_basis,i)) {
1613       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1614       j = 0;
1615       for (k=0;k<size_of_constraint;k++) {
1616         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1617           j++;
1618         }
1619       }
1620       /* found more than one primal dof on the cc */
1621       if (j > 1) {
1622         PetscBTSet(qr_needed_idx,i);
1623         qr_needed = PETSC_TRUE;
1624       }
1625     }
1626   }
1627   /* free workspace */
1628   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1629   /* permute indices in order to have a sorted set of vertices */
1630   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1631 
1632   /* nonzero structure of constraint matrix */
1633   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1634   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1635   j=total_primal_vertices;
1636   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1637     if (!PetscBTLookup(change_basis,i)) {
1638       nnz[j]=temp_indices[i+1]-temp_indices[i];
1639       j++;
1640     }
1641   }
1642   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1643   ierr = PetscFree(nnz);CHKERRQ(ierr);
1644   /* set values in constraint matrix */
1645   for (i=0;i<total_primal_vertices;i++) {
1646     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1647   }
1648   total_counts = total_primal_vertices;
1649   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1650     if (!PetscBTLookup(change_basis,i)) {
1651       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1652       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1653       total_counts++;
1654     }
1655   }
1656   /* assembling */
1657   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   /*
1660   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1661   */
1662   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1663   if (pcbddc->use_change_of_basis) {
1664     /* dual and primal dofs on a single cc */
1665     PetscInt     dual_dofs,primal_dofs;
1666     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1667     PetscInt     primal_counter;
1668     /* working stuff for GEQRF */
1669     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1670     PetscBLASInt lqr_work;
1671     /* working stuff for UNGQR */
1672     PetscScalar  *gqr_work,lgqr_work_t;
1673     PetscBLASInt lgqr_work;
1674     /* working stuff for TRTRS */
1675     PetscScalar  *trs_rhs;
1676     PetscBLASInt Blas_NRHS;
1677     /* pointers for values insertion into change of basis matrix */
1678     PetscInt     *start_rows,*start_cols;
1679     PetscScalar  *start_vals;
1680     /* working stuff for values insertion */
1681     PetscBT      is_primal;
1682 
1683     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1684     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1685     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1686     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1687     /* work arrays */
1688     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1689     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1690     /* nonzeros per row */
1691     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1692       if (PetscBTLookup(change_basis,i)) {
1693         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1694         if (PetscBTLookup(qr_needed_idx,i)) {
1695           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1696         } else {
1697           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1698           /* get local primal index on the cc */
1699           j = 0;
1700           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1701           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1702         }
1703       }
1704     }
1705     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1706     ierr = PetscFree(nnz);CHKERRQ(ierr);
1707     /* Set initial identity in the matrix */
1708     for (i=0;i<pcis->n_B;i++) {
1709       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1710     }
1711 
1712     if (pcbddc->dbg_flag) {
1713       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1714       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1715     }
1716 
1717 
1718     /* Now we loop on the constraints which need a change of basis */
1719     /*
1720        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1721        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1722 
1723        Basic blocks of change of basis matrix T computed
1724 
1725           - Using the following block transformation if there is only a primal dof on the cc
1726             (in the example, primal dof is the last one of the edge in LOCAL ordering
1727              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1728             | 1        0   ...        0     1 |
1729             | 0        1   ...        0     1 |
1730             |              ...                |
1731             | 0        ...            1     1 |
1732             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1733 
1734           - via QR decomposition of constraints otherwise
1735     */
1736     if (qr_needed) {
1737       /* space to store Q */
1738       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1739       /* first we issue queries for optimal work */
1740       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1741       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1742       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1743       lqr_work = -1;
1744       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1745       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1746       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1747       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1748       lgqr_work = -1;
1749       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1750       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1751       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1752       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1753       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1754       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1755       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1756       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1757       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1758       /* array to store scaling factors for reflectors */
1759       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1760       /* array to store rhs and solution of triangular solver */
1761       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1762       /* allocating workspace for check */
1763       if (pcbddc->dbg_flag) {
1764         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1765       }
1766     }
1767     /* array to store whether a node is primal or not */
1768     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1769     for (i=0;i<total_primal_vertices;i++) {
1770       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1771     }
1772 
1773     /* loop on constraints and see whether or not they need a change of basis and compute it */
1774     /* -> using implicit ordering contained in temp_indices data */
1775     total_counts = pcbddc->n_vertices;
1776     primal_counter = total_counts;
1777     while (total_counts<pcbddc->local_primal_size) {
1778       primal_dofs = 1;
1779       if (PetscBTLookup(change_basis,total_counts)) {
1780         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1781         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1782           primal_dofs++;
1783         }
1784         /* get constraint info */
1785         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1786         dual_dofs = size_of_constraint-primal_dofs;
1787 
1788         if (pcbddc->dbg_flag) {
1789           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1790         }
1791 
1792         if (primal_dofs > 1) { /* QR */
1793 
1794           /* copy quadrature constraints for change of basis check */
1795           if (pcbddc->dbg_flag) {
1796             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1797           }
1798           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1799           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1800 
1801           /* compute QR decomposition of constraints */
1802           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1803           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1804           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1805           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1806           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1807           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1808           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1809 
1810           /* explictly compute R^-T */
1811           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1812           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1813           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1814           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1815           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1816           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1817           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1818           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1819           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1820           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1821 
1822           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1823           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1824           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1825           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1826           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1827           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1828           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1829           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1830           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1831 
1832           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1833              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1834              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1835           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1836           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1837           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1838           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1839           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1840           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1841           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1842           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1843           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1844           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1845 
1846           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1847           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1848           /* insert cols for primal dofs */
1849           for (j=0;j<primal_dofs;j++) {
1850             start_vals = &qr_basis[j*size_of_constraint];
1851             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1852             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1853           }
1854           /* insert cols for dual dofs */
1855           for (j=0,k=0;j<dual_dofs;k++) {
1856             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1857               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1858               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1859               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1860               j++;
1861             }
1862           }
1863 
1864           /* check change of basis */
1865           if (pcbddc->dbg_flag) {
1866             PetscInt   ii,jj;
1867             PetscBool valid_qr=PETSC_TRUE;
1868             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1869             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1870             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1871             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1872             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1873             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1874             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1875             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1876             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1877             for (jj=0;jj<size_of_constraint;jj++) {
1878               for (ii=0;ii<primal_dofs;ii++) {
1879                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1880                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1881               }
1882             }
1883             if (!valid_qr) {
1884               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1885               for (jj=0;jj<size_of_constraint;jj++) {
1886                 for (ii=0;ii<primal_dofs;ii++) {
1887                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1888                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1889                   }
1890                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1891                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1892                   }
1893                 }
1894               }
1895             } else {
1896               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1897             }
1898           }
1899         } else { /* simple transformation block */
1900           PetscInt row,col;
1901           PetscScalar val;
1902           for (j=0;j<size_of_constraint;j++) {
1903             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1904             if (!PetscBTLookup(is_primal,row)) {
1905               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1906               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1907               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1908             } else {
1909               for (k=0;k<size_of_constraint;k++) {
1910                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1911                 if (row != col) {
1912                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1913                 } else {
1914                   val = 1.0;
1915                 }
1916                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1917               }
1918             }
1919           }
1920         }
1921         /* increment primal counter */
1922         primal_counter += primal_dofs;
1923       } else {
1924         if (pcbddc->dbg_flag) {
1925           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1926         }
1927       }
1928       /* increment constraint counter total_counts */
1929       total_counts += primal_dofs;
1930     }
1931 
1932     /* free workspace */
1933     if (qr_needed) {
1934       if (pcbddc->dbg_flag) {
1935         ierr = PetscFree(work);CHKERRQ(ierr);
1936       }
1937       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1938       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1939       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1940       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1941       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1942     }
1943     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1944     /* assembling */
1945     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1946     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1947     /*
1948     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1949     */
1950   }
1951 
1952   /* flush dbg viewer */
1953   if (pcbddc->dbg_flag) {
1954     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1955   }
1956 
1957   /* free workspace */
1958   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1959   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1960   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1961   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1962   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1963   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1964   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1965   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1966   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1967   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1973 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1974 {
1975   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1976   PC_IS       *pcis = (PC_IS*)pc->data;
1977   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1978   PetscInt    bs,ierr,i,vertex_size;
1979   PetscViewer viewer=pcbddc->dbg_viewer;
1980 
1981   PetscFunctionBegin;
1982   /* Init local Graph struct */
1983   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1984 
1985   /* Check validity of the csr graph passed in by the user */
1986   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1987     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1988   }
1989 
1990   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1991   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1992     Mat mat_adj;
1993     const PetscInt *xadj,*adjncy;
1994     PetscBool flg_row=PETSC_TRUE;
1995 
1996     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1997     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1998     if (!flg_row) {
1999       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2000     }
2001     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2002     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2003     if (!flg_row) {
2004       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2005     }
2006     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2007   }
2008 
2009   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2010   vertex_size = 1;
2011   if (!pcbddc->user_provided_isfordofs) {
2012     if (!pcbddc->n_ISForDofs) {
2013       IS *custom_ISForDofs;
2014 
2015       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2016       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2017       for (i=0;i<bs;i++) {
2018         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2019       }
2020       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2021       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2022       /* remove my references to IS objects */
2023       for (i=0;i<bs;i++) {
2024         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2025       }
2026       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2027     }
2028   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2029     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2030   }
2031 
2032   /* Setup of Graph */
2033   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2034 
2035   /* Graph's connected components analysis */
2036   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2037 
2038   /* print some info to stdout */
2039   if (pcbddc->dbg_flag) {
2040     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2047 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2048 {
2049   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2050   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   n = 0;
2055   vertices = 0;
2056   if (pcbddc->ConstraintMatrix) {
2057     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2058     for (i=0;i<local_primal_size;i++) {
2059       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2060       if (size_of_constraint == 1) n++;
2061       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2062     }
2063     if (vertices_idx) {
2064       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2065       n = 0;
2066       for (i=0;i<local_primal_size;i++) {
2067         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2068         if (size_of_constraint == 1) {
2069           vertices[n++]=row_cmat_indices[0];
2070         }
2071         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2072       }
2073     }
2074   }
2075   *n_vertices = n;
2076   if (vertices_idx) *vertices_idx = vertices;
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #undef __FUNCT__
2081 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2082 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2083 {
2084   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2085   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2086   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2087   PetscBT        touched;
2088   PetscErrorCode ierr;
2089 
2090     /* This function assumes that the number of local constraints per connected component
2091        is not greater than the number of nodes defined for the connected component
2092        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2093   PetscFunctionBegin;
2094   n = 0;
2095   constraints_index = 0;
2096   if (pcbddc->ConstraintMatrix) {
2097     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2098     max_size_of_constraint = 0;
2099     for (i=0;i<local_primal_size;i++) {
2100       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2101       if (size_of_constraint > 1) {
2102         n++;
2103       }
2104       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2105       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2106     }
2107     if (constraints_idx) {
2108       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2109       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2110       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2111       n = 0;
2112       for (i=0;i<local_primal_size;i++) {
2113         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2114         if (size_of_constraint > 1) {
2115           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2116           /* find first untouched local node */
2117           j = 0;
2118           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2119           min_index = row_cmat_global_indices[j];
2120           min_loc = j;
2121           /* search the minimum among nodes not yet touched on the connected component
2122              since there can be more than one constraint on a single cc */
2123           for (j=1;j<size_of_constraint;j++) {
2124             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2125               min_index = row_cmat_global_indices[j];
2126               min_loc = j;
2127             }
2128           }
2129           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2130           constraints_index[n++] = row_cmat_indices[min_loc];
2131         }
2132         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2133       }
2134       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2135       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2136     }
2137   }
2138   *n_constraints = n;
2139   if (constraints_idx) *constraints_idx = constraints_index;
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /* the next two functions has been adapted from pcis.c */
2144 #undef __FUNCT__
2145 #define __FUNCT__ "PCBDDCApplySchur"
2146 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2147 {
2148   PetscErrorCode ierr;
2149   PC_IS          *pcis = (PC_IS*)(pc->data);
2150 
2151   PetscFunctionBegin;
2152   if (!vec2_B) { vec2_B = v; }
2153   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2154   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2155   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2156   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2157   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2163 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2164 {
2165   PetscErrorCode ierr;
2166   PC_IS          *pcis = (PC_IS*)(pc->data);
2167 
2168   PetscFunctionBegin;
2169   if (!vec2_B) { vec2_B = v; }
2170   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2171   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2172   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2173   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2174   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "PCBDDCSubsetNumbering"
2180 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2181 {
2182   Vec            local_vec,global_vec;
2183   IS             seqis,paris;
2184   VecScatter     scatter_ctx;
2185   PetscScalar    *array;
2186   PetscInt       *temp_global_dofs;
2187   PetscScalar    globalsum;
2188   PetscInt       i,j,s;
2189   PetscInt       nlocals,first_index,old_index,max_local;
2190   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2191   PetscMPIInt    *dof_sizes,*dof_displs;
2192   PetscBool      first_found;
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   /* mpi buffers */
2197   MPI_Comm_size(comm,&size_prec_comm);
2198   MPI_Comm_rank(comm,&rank_prec_comm);
2199   j = ( !rank_prec_comm ? size_prec_comm : 0);
2200   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2201   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2202   /* get maximum size of subset */
2203   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2204   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2205   max_local = 0;
2206   if (n_local_dofs) {
2207     max_local = temp_global_dofs[0];
2208     for (i=1;i<n_local_dofs;i++) {
2209       if (max_local < temp_global_dofs[i] ) {
2210         max_local = temp_global_dofs[i];
2211       }
2212     }
2213   }
2214   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2215   max_global++;
2216   max_local = 0;
2217   if (n_local_dofs) {
2218     max_local = local_dofs[0];
2219     for (i=1;i<n_local_dofs;i++) {
2220       if (max_local < local_dofs[i] ) {
2221         max_local = local_dofs[i];
2222       }
2223     }
2224   }
2225   max_local++;
2226   /* allocate workspace */
2227   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2228   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2229   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2230   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2231   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2232   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2233   /* create scatter */
2234   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2235   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2236   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2237   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2238   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2239   /* init array */
2240   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2241   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2242   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2243   if (local_dofs_mult) {
2244     for (i=0;i<n_local_dofs;i++) {
2245       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2246     }
2247   } else {
2248     for (i=0;i<n_local_dofs;i++) {
2249       array[local_dofs[i]]=1.0;
2250     }
2251   }
2252   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2253   /* scatter into global vec and get total number of global dofs */
2254   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2255   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2256   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2257   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2258   /* Fill global_vec with cumulative function for global numbering */
2259   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2260   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2261   nlocals = 0;
2262   first_index = -1;
2263   first_found = PETSC_FALSE;
2264   for (i=0;i<s;i++) {
2265     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2266       first_found = PETSC_TRUE;
2267       first_index = i;
2268     }
2269     nlocals += (PetscInt)PetscRealPart(array[i]);
2270   }
2271   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2272   if (!rank_prec_comm) {
2273     dof_displs[0]=0;
2274     for (i=1;i<size_prec_comm;i++) {
2275       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2276     }
2277   }
2278   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2279   if (first_found) {
2280     array[first_index] += (PetscScalar)nlocals;
2281     old_index = first_index;
2282     for (i=first_index+1;i<s;i++) {
2283       if (PetscRealPart(array[i]) > 0.0) {
2284         array[i] += array[old_index];
2285         old_index = i;
2286       }
2287     }
2288   }
2289   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2290   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2291   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2292   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2293   /* get global ordering of local dofs */
2294   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2295   if (local_dofs_mult) {
2296     for (i=0;i<n_local_dofs;i++) {
2297       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2298     }
2299   } else {
2300     for (i=0;i<n_local_dofs;i++) {
2301       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2302     }
2303   }
2304   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2305   /* free workspace */
2306   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2307   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2308   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2309   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2310   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2311   /* return pointer to global ordering of local dofs */
2312   *global_numbering_subset = temp_global_dofs;
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2318 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2319 {
2320   PetscInt       i,j;
2321   PetscScalar    *alphas;
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   /* this implements stabilized Gram-Schmidt */
2326   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2327   for (i=0;i<n;i++) {
2328     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2329     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2330     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2331   }
2332   ierr = PetscFree(alphas);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatConvert_IS_AIJ"
2339 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2340 {
2341   Mat new_mat;
2342   Mat_IS *matis = (Mat_IS*)(mat->data);
2343   /* info on mat */
2344   PetscInt bs,rows,cols;
2345   PetscInt lrows,lcols;
2346   PetscInt local_rows,local_cols;
2347   PetscMPIInt nsubdomains;
2348   /* preallocation */
2349   Vec vec_dnz,vec_onz;
2350   PetscScalar *my_dnz,*my_onz,*array;
2351   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2352   PetscInt  index_row,index_col,owner;
2353   PetscInt  *local_indices,*global_indices;
2354   /* work */
2355   PetscInt i,j;
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   /* CHECKS */
2360   /* get info from mat */
2361   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2362   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2363   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2364   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2365 
2366   /* MAT_INITIAL_MATRIX */
2367   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2368   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2369   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2370   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2371   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2372   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2373 
2374   /*
2375     preallocation
2376   */
2377 
2378   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2379   /*
2380      Some vectors are needed to sum up properly on shared interface dofs.
2381      Note that preallocation is not exact, since it overestimates nonzeros
2382   */
2383 /*
2384   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2385   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2386   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2387   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2388 */
2389   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2390   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2391   /* All processes need to compute entire row ownership */
2392   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2393   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2394   for (i=0;i<nsubdomains;i++) {
2395     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2396       row_ownership[j]=i;
2397     }
2398   }
2399   /* map indices of local mat to global values */
2400   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2401   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2402   for (i=0;i<local_rows;i++) local_indices[i]=i;
2403   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2404   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2405 
2406   /*
2407      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2408      then, they will be summed up properly. This way, preallocation is always sufficient
2409   */
2410   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2411   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2412   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2413   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2414   for (i=0;i<local_rows;i++) {
2415     index_row = global_indices[i];
2416     for (j=i;j<local_rows;j++) {
2417       owner = row_ownership[index_row];
2418       index_col = global_indices[j];
2419       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2420         my_dnz[i] += 1.0;
2421       } else { /* offdiag block */
2422         my_onz[i] += 1.0;
2423       }
2424       /* same as before, interchanging rows and cols */
2425       if (i != j) {
2426         owner = row_ownership[index_col];
2427         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2428           my_dnz[j] += 1.0;
2429         } else {
2430           my_onz[j] += 1.0;
2431         }
2432       }
2433     }
2434   }
2435   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2436   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2437   if (local_rows) { /* multilevel guard */
2438     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2439     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2440   }
2441   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2442   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2443   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2444   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2445   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2446   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2447   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2448 
2449   /* set computed preallocation in dnz and onz */
2450   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2451   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2452   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2453   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2454   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2455   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2456   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2457   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2458 
2459   /* Resize preallocation if overestimated */
2460   for (i=0;i<lrows;i++) {
2461     dnz[i] = PetscMin(dnz[i],lcols);
2462     onz[i] = PetscMin(onz[i],cols-lcols);
2463   }
2464   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2465   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2466 
2467   /*
2468     Set values. Very Basic.
2469   */
2470   for (i=0;i<local_rows;i++) {
2471     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2472     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2473     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2474     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2475     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2476   }
2477   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2479   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2480 
2481   /* get back mat */
2482   *M = new_mat;
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatISSubassemble_Private"
2488 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2489 {
2490   Mat             subdomain_adj;
2491   IS              new_ranks,ranks_send_to;
2492   MatPartitioning partitioner;
2493   Mat_IS          *matis;
2494   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2495   PetscInt        prank;
2496   PetscMPIInt     size,rank,color;
2497   PetscInt        *xadj,*adjncy,*oldranks;
2498   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2499   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2500   PetscErrorCode  ierr;
2501   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2502   PetscSubcomm    subcomm;
2503 
2504   PetscFunctionBegin;
2505   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2506   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2507   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2508 
2509   /* Get info on mapping */
2510   matis = (Mat_IS*)(mat->data);
2511   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2512   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2513 
2514   /* build local CSR graph of subdomains' connectivity */
2515   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2516   xadj[0] = 0;
2517   xadj[1] = PetscMax(n_neighs-1,0);
2518   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2519   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2520 
2521   if (threshold) {
2522     PetscInt* count,min_threshold;
2523     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2524     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2525     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2526       for (j=0;j<n_shared[i];j++) {
2527         count[shared[i][j]] += 1;
2528       }
2529     }
2530     /* adapt threshold since we dont want to lose significant connections */
2531     min_threshold = n_neighs;
2532     for (i=1;i<n_neighs;i++) {
2533       for (j=0;j<n_shared[i];j++) {
2534         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2535       }
2536     }
2537     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2538     threshold = PetscMax(min_threshold+1,threshold);
2539     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2540     xadj[1] = 0;
2541     for (i=1;i<n_neighs;i++) {
2542       for (j=0;j<n_shared[i];j++) {
2543         if (count[shared[i][j]] < threshold) {
2544           adjncy[xadj[1]] = neighs[i];
2545           adjncy_wgt[xadj[1]] = n_shared[i];
2546           xadj[1]++;
2547           break;
2548         }
2549       }
2550     }
2551     ierr = PetscFree(count);CHKERRQ(ierr);
2552   } else {
2553     if (xadj[1]) {
2554       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2555       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2556     }
2557   }
2558   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2559   if (use_square) {
2560     for (i=0;i<xadj[1];i++) {
2561       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2562     }
2563   }
2564   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2565 
2566   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2567 
2568   /*
2569     Restrict work on active processes only.
2570   */
2571   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2572   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2573   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2574   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2575   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2576   if (color) {
2577     ierr = PetscFree(xadj);CHKERRQ(ierr);
2578     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2579     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2580   } else {
2581     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2582     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2583     prank = rank;
2584     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2585     for (i=0;i<size;i++) {
2586       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2587     }
2588     for (i=0;i<xadj[1];i++) {
2589       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2590     }
2591     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2592     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2593     n_subdomains = (PetscInt)size;
2594     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2595 
2596     /* Partition */
2597     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2598     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2599     if (use_vwgt) {
2600       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2601       v_wgt[0] = local_size;
2602       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2603     }
2604     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2605     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2606     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2607     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2608     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2609 
2610     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2611     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2612     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2613     /* clean up */
2614     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2615     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2616     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2617     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2618   }
2619   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2620 
2621   /* assemble parallel IS for sends */
2622   i = 1;
2623   if (color) i=0;
2624   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2625   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2626 
2627   /* get back IS */
2628   *is_sends = ranks_send_to;
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatISSubassemble"
2636 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2637 {
2638   Mat                    local_mat,new_mat;
2639   Mat_IS                 *matis;
2640   IS                     is_sends_internal;
2641   PetscInt               rows,cols;
2642   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2643   PetscBool              ismatis,isdense;
2644   ISLocalToGlobalMapping l2gmap;
2645   PetscInt*              l2gmap_indices;
2646   const PetscInt*        is_indices;
2647   MatType                new_local_type;
2648   MatTypePrivate         new_local_type_private;
2649   /* buffers */
2650   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2651   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2652   /* MPI */
2653   MPI_Comm               comm;
2654   PetscMPIInt            n_sends,n_recvs,commsize;
2655   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2656   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2657   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2658   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2659   PetscErrorCode         ierr;
2660 
2661   PetscFunctionBegin;
2662   /* checks */
2663   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2664   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2665   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2666   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2667   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2668   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2669   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2670   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2671   PetscValidLogicalCollectiveInt(mat,bs,0);
2672   /* prepare IS for sending if not provided */
2673   if (!is_sends) {
2674     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2675     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2676   } else {
2677     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2678     is_sends_internal = is_sends;
2679   }
2680 
2681   /* get pointer of MATIS data */
2682   matis = (Mat_IS*)mat->data;
2683 
2684   /* get comm */
2685   comm = PetscObjectComm((PetscObject)mat);
2686 
2687   /* compute number of sends */
2688   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2689   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2690 
2691   /* compute number of receives */
2692   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2693   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2694   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2695   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2696   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2697   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2698   ierr = PetscFree(iflags);CHKERRQ(ierr);
2699 
2700   /* prepare send/receive buffers */
2701   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2702   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2703   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2704   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2705 
2706   /* Get data from local mat */
2707   if (!isdense) {
2708     /* TODO: See below some guidelines on how to prepare the local buffers */
2709     /*
2710        send_buffer_vals should contain the raw values of the local matrix
2711        send_buffer_idxs should contain:
2712        - MatType_PRIVATE type
2713        - PetscInt        size_of_l2gmap
2714        - PetscInt        global_row_indices[size_of_l2gmap]
2715        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2716     */
2717   } else {
2718     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2719     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2720     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2721     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2722     send_buffer_idxs[1] = i;
2723     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2724     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2725     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2726     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2727     for (i=0;i<n_sends;i++) {
2728       ilengths_vals[is_indices[i]] = len*len;
2729       ilengths_idxs[is_indices[i]] = len+2;
2730     }
2731   }
2732   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2733   buf_size_idxs = 0;
2734   buf_size_vals = 0;
2735   for (i=0;i<n_recvs;i++) {
2736     buf_size_idxs += (PetscInt)olengths_idxs[i];
2737     buf_size_vals += (PetscInt)olengths_vals[i];
2738   }
2739   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2740   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2741 
2742   /* get new tags for clean communications */
2743   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2744   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2745 
2746   /* allocate for requests */
2747   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2748   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2749   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2750   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2751 
2752   /* communications */
2753   ptr_idxs = recv_buffer_idxs;
2754   ptr_vals = recv_buffer_vals;
2755   for (i=0;i<n_recvs;i++) {
2756     source_dest = onodes[i];
2757     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2758     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2759     ptr_idxs += olengths_idxs[i];
2760     ptr_vals += olengths_vals[i];
2761   }
2762   for (i=0;i<n_sends;i++) {
2763     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2764     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2765     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2766   }
2767   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2768   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2769 
2770   /* assemble new l2g map */
2771   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2772   ptr_idxs = recv_buffer_idxs;
2773   buf_size_idxs = 0;
2774   for (i=0;i<n_recvs;i++) {
2775     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2776     ptr_idxs += olengths_idxs[i];
2777   }
2778   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2779   ptr_idxs = recv_buffer_idxs;
2780   buf_size_idxs = 0;
2781   for (i=0;i<n_recvs;i++) {
2782     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2783     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2784     ptr_idxs += olengths_idxs[i];
2785   }
2786   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2787   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2788   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2789 
2790   /* infer new local matrix type from received local matrices type */
2791   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2792   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2793   new_local_type_private = MATAIJ_PRIVATE;
2794   new_local_type = MATSEQAIJ;
2795   if (n_recvs) {
2796     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2797     ptr_idxs = recv_buffer_idxs;
2798     for (i=0;i<n_recvs;i++) {
2799       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2800         new_local_type_private = MATAIJ_PRIVATE;
2801         break;
2802       }
2803       ptr_idxs += olengths_idxs[i];
2804     }
2805     switch (new_local_type_private) {
2806       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2807         new_local_type = MATSEQAIJ;
2808         bs = 1;
2809         break;
2810       case MATAIJ_PRIVATE:
2811         new_local_type = MATSEQAIJ;
2812         bs = 1;
2813         break;
2814       case MATBAIJ_PRIVATE:
2815         new_local_type = MATSEQBAIJ;
2816         break;
2817       case MATSBAIJ_PRIVATE:
2818         new_local_type = MATSEQSBAIJ;
2819         break;
2820       default:
2821         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2822         break;
2823     }
2824   }
2825 
2826   /* create MATIS object */
2827   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2828   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2829   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2830   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2831   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2832   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2833 
2834   /* set values */
2835   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2836   ptr_vals = recv_buffer_vals;
2837   ptr_idxs = recv_buffer_idxs;
2838   for (i=0;i<n_recvs;i++) {
2839     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2840       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2841       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2842       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2843       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2844       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2845     }
2846     ptr_idxs += olengths_idxs[i];
2847     ptr_vals += olengths_vals[i];
2848   }
2849   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2851   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2852   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2853 
2854   { /* check */
2855     Vec       lvec,rvec;
2856     PetscReal infty_error;
2857 
2858     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2859     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2860     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2861     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2862     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2863     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2864     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2865     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2866     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2867   }
2868 
2869   /* free workspace */
2870   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2871   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2872   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2873   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2874   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2875   if (isdense) {
2876     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2877     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2878   } else {
2879     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2880   }
2881   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2882   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2883   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2884   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2885   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2886   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2887   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2888   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2889   ierr = PetscFree(onodes);CHKERRQ(ierr);
2890   /* get back new mat */
2891   *mat_n = new_mat;
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 #undef __FUNCT__
2896 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2897 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2898 {
2899   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2900   PC_IS                  *pcis = (PC_IS*)pc->data;
2901   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2902   MatNullSpace           CoarseNullSpace=NULL;
2903   ISLocalToGlobalMapping coarse_islg;
2904   IS                     coarse_is;
2905   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2906   PetscInt               im_active=-1,active_procs=-1;
2907   PC                     pc_temp;
2908   PCType                 coarse_pc_type;
2909   KSPType                coarse_ksp_type;
2910   PetscBool              multilevel_requested,multilevel_allowed;
2911   PetscBool              setsym,issym,isbddc,isnn;
2912   MatStructure           matstruct;
2913   PetscErrorCode         ierr;
2914 
2915   PetscFunctionBegin;
2916   /* Assign global numbering to coarse dofs */
2917   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2918 
2919   /* infer some info from user */
2920   issym = PETSC_FALSE;
2921   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2922   multilevel_allowed = PETSC_FALSE;
2923   multilevel_requested = PETSC_FALSE;
2924   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2925   if (multilevel_requested) {
2926     /* count "active processes" */
2927     im_active = !!(pcis->n);
2928     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2929     if (active_procs/pcbddc->coarsening_ratio < 2) {
2930       multilevel_allowed = PETSC_FALSE;
2931     } else {
2932       multilevel_allowed = PETSC_TRUE;
2933     }
2934   }
2935 
2936   /* set defaults for coarse KSP and PC */
2937   if (multilevel_allowed) {
2938     if (issym) {
2939       coarse_ksp_type = KSPRICHARDSON;
2940     } else {
2941       coarse_ksp_type = KSPCHEBYSHEV;
2942     }
2943     coarse_pc_type = PCBDDC;
2944   } else {
2945     coarse_ksp_type = KSPPREONLY;
2946     coarse_pc_type = PCREDUNDANT;
2947   }
2948 
2949   /* create the coarse KSP object only once with defaults */
2950   if (!pcbddc->coarse_ksp) {
2951     char prefix[256],str_level[3];
2952     size_t len;
2953     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2954     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2955     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2956     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2957     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2958     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2959     /* prefix */
2960     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2961     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2962     if (!pcbddc->current_level) {
2963       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2964       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2965     } else {
2966       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2967       if (pcbddc->current_level>1) len -= 2;
2968       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2969       *(prefix+len)='\0';
2970       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2971       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
2972     }
2973     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
2974   }
2975   /* allow user customization */
2976   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2977   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2978   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2979 
2980   /* get some info after set from options */
2981   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2982   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2983   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2984   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2985     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2986     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2987     isbddc = PETSC_FALSE;
2988   }
2989 
2990   /* propagate BDDC info to the next level */
2991   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2992   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2993   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2994 
2995   /* creates temporary MATIS object for coarse matrix */
2996   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2997   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2998   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2999   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3000   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3001   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3004   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3005   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3006 
3007   /* assemble coarse matrix */
3008   if (isbddc || isnn) {
3009     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3010   } else {
3011     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3012   }
3013   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3014 
3015   /* create local to global scatters for coarse problem */
3016   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3017   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3018   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3019 
3020   /* propagate symmetry info to coarse matrix */
3021   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3022 
3023   /* Compute coarse null space (special handling by BDDC only) */
3024   if (pcbddc->NullSpace) {
3025     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3026     if (isbddc) {
3027       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3028     } else {
3029       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3030     }
3031   }
3032 
3033   /* set operators */
3034   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3035   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3036 
3037   /* additional KSP customization */
3038   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3039   if (max_it < 5) {
3040     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3041   }
3042   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3043 
3044 
3045   /* print some info if requested */
3046   if (pcbddc->dbg_flag) {
3047     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3048     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3049     if (!multilevel_allowed) {
3050       if (multilevel_requested) {
3051         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3052       } else if (pcbddc->max_levels) {
3053         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3054       }
3055     }
3056     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3057     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3058   }
3059 
3060   /* setup coarse ksp */
3061   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3062   if (pcbddc->dbg_flag) {
3063     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3064     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3065     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3066     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3067   }
3068 
3069   /* Check coarse problem if requested */
3070   if (pcbddc->dbg_flag) {
3071     KSP       check_ksp;
3072     KSPType   check_ksp_type;
3073     PC        check_pc;
3074     Vec       check_vec;
3075     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3076     PetscInt  its;
3077     PetscBool ispreonly,compute;
3078 
3079     /* Create ksp object suitable for estimation of extreme eigenvalues */
3080     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3081     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3082     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3083     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3084     if (ispreonly) {
3085       check_ksp_type = KSPPREONLY;
3086       compute = PETSC_FALSE;
3087     } else {
3088       if (issym) check_ksp_type = KSPCG;
3089       else check_ksp_type = KSPGMRES;
3090       compute = PETSC_TRUE;
3091     }
3092     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3093     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3094     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3095     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3096     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3097     /* create random vec */
3098     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3099     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3100     if (CoarseNullSpace) {
3101       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3102     }
3103     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3104     /* solve coarse problem */
3105     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3106     if (CoarseNullSpace) {
3107       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3108     }
3109     /* check coarse problem residual error */
3110     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3111     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3112     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3113     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3114     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3115     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3116     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3117     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3118     /* get eigenvalue estimation if preonly has not been requested */
3119     if (!ispreonly) {
3120       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3121       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3122       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3123     }
3124     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3125     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3126   }
3127   /* free memory */
3128   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3129   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 #undef __FUNCT__
3134 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3135 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3136 {
3137   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3138   PC_IS*         pcis = (PC_IS*)pc->data;
3139   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3140   PetscInt       i,j,coarse_size;
3141   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3142   PetscErrorCode ierr;
3143 
3144   PetscFunctionBegin;
3145   /* get indices in local ordering for vertices and constraints */
3146   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3147   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3148   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3149   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3150   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3151   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3152   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3153 
3154   /* Compute global number of coarse dofs */
3155   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3156 
3157   /* check numbering */
3158   if (pcbddc->dbg_flag) {
3159     PetscScalar coarsesum,*array;
3160 
3161     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3162     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3163     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3164     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3165     for (i=0;i<pcbddc->local_primal_size;i++) {
3166       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3167     }
3168     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3169     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3170     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3171     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3172     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3173     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3175     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3176     for (i=0;i<pcis->n;i++) {
3177       if (array[i] == 1.0) {
3178         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3179       }
3180     }
3181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3182     for (i=0;i<pcis->n;i++) {
3183       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3184     }
3185     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3186     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3187     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3188     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3189     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3190     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3191     if (pcbddc->dbg_flag > 1) {
3192       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3193       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3194       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3195       for (i=0;i<pcbddc->local_primal_size;i++) {
3196         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3197       }
3198       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3199     }
3200     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3201   }
3202   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3203   /* get back data */
3204   *coarse_size_n = coarse_size;
3205   *local_primal_indices_n = local_primal_indices;
3206   PetscFunctionReturn(0);
3207 }
3208 
3209