xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c0553b1f7b827a67e4f25d96208a74074e1d61ba)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559     unsymmetric_check = PETSC_TRUE;
560   } else { /* take references to already computed coarse basis */
561     unsymmetric_check = PETSC_FALSE;
562     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
563     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
564     if (pcbddc->coarse_phi_D) {
565       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
566       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
567     }
568   }
569   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
570   /* Checking coarse_sub_mat and coarse basis functios */
571   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
572   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
573   if (pcbddc->dbg_flag) {
574     Mat         coarse_sub_mat;
575     Mat         AUXMAT,TM1,TM2,TM3,TM4;
576     Mat         coarse_phi_D,coarse_phi_B;
577     Mat         coarse_psi_D,coarse_psi_B;
578     Mat         A_II,A_BB,A_IB,A_BI;
579     MatType     checkmattype=MATSEQAIJ;
580     PetscReal   real_value;
581 
582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
588     if (unsymmetric_check) {
589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
591     }
592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
593 
594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597     if (unsymmetric_check) {
598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     } else {
611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
619     }
620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
627     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
632     }
633     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
634     for (i=0;i<pcbddc->local_primal_size;i++) {
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
636     }
637     if (unsymmetric_check) {
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
641       }
642       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
643       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
645       }
646     }
647     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
649     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
652     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
653     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
656     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
663     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
664     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
665     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
666   }
667   /* free memory */
668   if (n_vertices) {
669     ierr = PetscFree(vertices);CHKERRQ(ierr);
670     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
672     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
675   }
676   if (n_constraints) {
677     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
678     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
679     ierr = MatDestroy(&M1);CHKERRQ(ierr);
680     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
681   }
682   ierr = PetscFree(auxindices);CHKERRQ(ierr);
683   /* get back data */
684   *coarse_submat_vals_n = coarse_submat_vals;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
691 {
692   PC_IS*            pcis = (PC_IS*)(pc->data);
693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
695   PetscBool         issbaij,isbaij;
696   /* manage repeated solves */
697   MatReuse          reuse;
698   MatStructure      matstruct;
699   PetscErrorCode    ierr;
700 
701   PetscFunctionBegin;
702   /* get mat flags */
703   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
704   reuse = MAT_INITIAL_MATRIX;
705   if (pc->setupcalled) {
706     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
707     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
708     if (matstruct == SAME_NONZERO_PATTERN) {
709       reuse = MAT_REUSE_MATRIX;
710     } else {
711       reuse = MAT_INITIAL_MATRIX;
712     }
713   }
714   if (reuse == MAT_INITIAL_MATRIX) {
715     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
716     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
717     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
720   }
721 
722   /* transform local matrices if needed */
723   if (pcbddc->use_change_of_basis) {
724     Mat         change_mat_all;
725     PetscScalar *row_cmat_values;
726     PetscInt    *row_cmat_indices;
727     PetscInt    *nnz,*is_indices,*temp_indices;
728     PetscInt    i,j,k,n_D,n_B;
729 
730     /* Get Non-overlapping dimensions */
731     n_B = pcis->n_B;
732     n_D = pcis->n-n_B;
733 
734     /* compute nonzero structure of change of basis on all local nodes */
735     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     k=1;
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
743       nnz[is_indices[i]]=j;
744       if (k < j) k = j;
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     /* assemble change of basis matrix on the whole set of local dofs */
749     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
750     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
751     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
752     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
753     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
754     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     for (i=0;i<n_D;i++) {
756       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
757     }
758     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
759     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
760     for (i=0;i<n_B;i++) {
761       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
762       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
763       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
764       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
765     }
766     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
769     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
770     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
771     if (!issbaij && !isbaij) {
772       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773     } else {
774       Mat work_mat;
775       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
776       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
777       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
778     }
779     /*
780     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
781     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
782     */
783     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
784     ierr = PetscFree(nnz);CHKERRQ(ierr);
785     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
786   } else {
787     /* without change of basis, the local matrix is unchanged */
788     if (!pcbddc->local_mat) {
789       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
790       pcbddc->local_mat = matis->A;
791     }
792   }
793 
794   /* get submatrices */
795   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
796   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
797   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
798   if (!issbaij) {
799     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
800     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
801   } else {
802     Mat newmat;
803     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
804     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
805     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
806     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
814 {
815   PC_IS*         pcis = (PC_IS*)(pc->data);
816   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
817   IS             is_aux1,is_aux2;
818   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
819   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
820   PetscInt       vbs,bs;
821   PetscBT        bitmask;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   /* Set Non-overlapping dimensions */
826   n_B = pcis->n_B; n_D = pcis->n - n_B;
827   /* get vertex indices from constraint matrix */
828   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
829 
830   /* create auxiliary bitmask */
831   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
832   for (i=0;i<n_vertices;i++) {
833     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
834   }
835 
836   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
837   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
838   for (i=0, n_R=0; i<pcis->n; i++) {
839     if (!PetscBTLookup(bitmask,i)) {
840       idx_R_local[n_R] = i;
841       n_R++;
842     }
843   }
844 
845   /* Block code */
846   vbs = 1;
847   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
848   if (bs>1 && !(n_vertices%bs)) {
849     PetscBool is_blocked = PETSC_TRUE;
850     PetscInt  *vary;
851     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
852     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
853     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
854     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
855     for (i=0; i<n_vertices; i++) {
856       if (vary[i]!=0 && vary[i]!=bs) {
857         is_blocked = PETSC_FALSE;
858         break;
859       }
860     }
861     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
862       vbs = bs;
863       for (i=0;i<n_R/vbs;i++) {
864         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
865       }
866     }
867     ierr = PetscFree(vary);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(vertices);CHKERRQ(ierr);
870   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
871   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
872 
873   /* print some info if requested */
874   if (pcbddc->dbg_flag) {
875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
877     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
878     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
879     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
880     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
885   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
886   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
887   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0; i<n_D; i++) {
890     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
891   }
892   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_R; i++) {
894     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
895       aux_array1[j++] = i;
896     }
897   }
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
899   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
900   for (i=0, j=0; i<n_B; i++) {
901     if (!PetscBTLookup(bitmask,is_indices[i])) {
902       aux_array2[j++] = i;
903     }
904   }
905   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
906   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
907   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
908   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
909   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
910 
911   if (pcbddc->switch_static || pcbddc->dbg_flag) {
912     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
913     for (i=0, j=0; i<n_R; i++) {
914       if (PetscBTLookup(bitmask,idx_R_local[i])) {
915         aux_array1[j++] = i;
916       }
917     }
918     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
919     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
920     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
921   }
922   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
931 {
932   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
933   PC_IS          *pcis = (PC_IS*)pc->data;
934   PC             pc_temp;
935   Mat            A_RR;
936   Vec            vec1,vec2,vec3;
937   MatStructure   matstruct;
938   PetscScalar    m_one = -1.0;
939   PetscReal      value;
940   PetscInt       n_D,n_R,ibs,mbs;
941   PetscBool      use_exact,use_exact_reduced,issbaij;
942   PetscErrorCode ierr;
943   /* prefixes stuff */
944   char           dir_prefix[256],neu_prefix[256],str_level[3];
945   size_t         len;
946 
947   PetscFunctionBegin;
948   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
949 
950   /* compute prefixes */
951   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
952   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
953   if (!pcbddc->current_level) {
954     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
955     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
956     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
957     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
958   } else {
959     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
960     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
961     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
962     len -= 15; /* remove "pc_bddc_coarse_" */
963     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
964     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
965     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
966     *(dir_prefix+len)='\0';
967     *(neu_prefix+len)='\0';
968     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
969     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
970     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
971     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
972   }
973 
974   /* DIRICHLET PROBLEM */
975   /* Matrix for Dirichlet problem is pcis->A_II */
976   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
977   if (!pcbddc->ksp_D) { /* create object if not yet build */
978     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
979     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
980     /* default */
981     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
982     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
983     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
984     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
985     if (issbaij) {
986       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
987     } else {
988       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
989     }
990     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
991   }
992   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
993   /* Allow user's customization */
994   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
996   if (!n_D) {
997     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
998     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
999   }
1000   /* Set Up KSP for Dirichlet problem of BDDC */
1001   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1002   /* set ksp_D into pcis data */
1003   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1004   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1005   pcis->ksp_D = pcbddc->ksp_D;
1006 
1007   /* NEUMANN PROBLEM */
1008   /* Matrix for Neumann problem is A_RR -> we need to create it */
1009   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1011   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1012   if (ibs != mbs) {
1013     Mat newmat;
1014     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1015     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1016     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1017   } else {
1018     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1019   }
1020   if (!pcbddc->ksp_R) { /* create object if not yet build */
1021     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1022     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1023     /* default */
1024     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1025     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1026     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1027     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1028     if (issbaij) {
1029       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1030     } else {
1031       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1032     }
1033     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1034   }
1035   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1036   /* Allow user's customization */
1037   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1038   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1039   if (!n_R) {
1040     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1041     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1042   }
1043   /* Set Up KSP for Neumann problem of BDDC */
1044   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1045 
1046   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1047 
1048   /* Dirichlet */
1049   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1050   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1051   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1052   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1053   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1054   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1055   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1056   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1057   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1058   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1059   /* need to be adapted? */
1060   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1061   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1062   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1063   /* print info */
1064   if (pcbddc->dbg_flag) {
1065     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1068     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1070   }
1071   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1072     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1073   }
1074 
1075   /* Neumann */
1076   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1077   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1078   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1079   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1080   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1081   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1082   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1083   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1084   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1085   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1086   /* need to be adapted? */
1087   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1088   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089   /* print info */
1090   if (pcbddc->dbg_flag) {
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1095     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1096   }
1097   /* free Neumann problem's matrix */
1098   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1104 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc)
1105 {
1106   PetscErrorCode ierr;
1107   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1108 
1109   PetscFunctionBegin;
1110   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1111   if (pcbddc->local_auxmat1) {
1112     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1113     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1120 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1121 {
1122   PetscErrorCode ierr;
1123   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1124   PC_IS*            pcis = (PC_IS*)  (pc->data);
1125   const PetscScalar zero = 0.0;
1126 
1127   PetscFunctionBegin;
1128   /* Application of PSI^T  */
1129   ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1130   if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1131   /* Scatter data of coarse_rhs */
1132   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1133   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134 
1135   /* Local solution on R nodes */
1136   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1137   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   if (pcbddc->switch_static) {
1140     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   }
1143   ierr = PCBDDCSolveSubstructureCorrection(pc);CHKERRQ(ierr);
1144   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1145   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147   if (pcbddc->switch_static) {
1148     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150   }
1151 
1152   /* Coarse solution */
1153   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1155     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1156   }
1157   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159 
1160   /* Sum contributions from two levels */
1161   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1162   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1168 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1169 {
1170   PetscErrorCode ierr;
1171   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1172 
1173   PetscFunctionBegin;
1174   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1180 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1181 {
1182   PetscErrorCode ierr;
1183   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1184 
1185   PetscFunctionBegin;
1186   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 /* uncomment for testing purposes */
1191 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1192 #undef __FUNCT__
1193 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1194 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1195 {
1196   PetscErrorCode    ierr;
1197   PC_IS*            pcis = (PC_IS*)(pc->data);
1198   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1199   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1200   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1201   MatType           impMatType=MATSEQAIJ;
1202   /* one and zero */
1203   PetscScalar       one=1.0,zero=0.0;
1204   /* space to store constraints and their local indices */
1205   PetscScalar       *temp_quadrature_constraint;
1206   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1207   /* iterators */
1208   PetscInt          i,j,k,total_counts,temp_start_ptr;
1209   /* stuff to store connected components stored in pcbddc->mat_graph */
1210   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1211   PetscInt          n_ISForFaces,n_ISForEdges;
1212   /* near null space stuff */
1213   MatNullSpace      nearnullsp;
1214   const Vec         *nearnullvecs;
1215   Vec               *localnearnullsp;
1216   PetscBool         nnsp_has_cnst;
1217   PetscInt          nnsp_size;
1218   PetscScalar       *array;
1219   /* BLAS integers */
1220   PetscBLASInt      lwork,lierr;
1221   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1222   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1223   /* LAPACK working arrays for SVD or POD */
1224   PetscBool         skip_lapack;
1225   PetscScalar       *work;
1226   PetscReal         *singular_vals;
1227 #if defined(PETSC_USE_COMPLEX)
1228   PetscReal         *rwork;
1229 #endif
1230 #if defined(PETSC_MISSING_LAPACK_GESVD)
1231   PetscBLASInt      Blas_one_2=1;
1232   PetscScalar       *temp_basis,*correlation_mat;
1233 #else
1234   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1235   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1236 #endif
1237   /* change of basis */
1238   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1239   PetscBool         boolforchange,qr_needed;
1240   PetscBT           touched,change_basis,qr_needed_idx;
1241   /* auxiliary stuff */
1242   PetscInt          *nnz,*is_indices,*local_to_B;
1243   /* some quantities */
1244   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1245   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1246 
1247 
1248   PetscFunctionBegin;
1249   /* Get index sets for faces, edges and vertices from graph */
1250   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1251     pcbddc->use_vertices = PETSC_TRUE;
1252   }
1253   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1254   /* HACK: provide functions to set change of basis */
1255   if (!ISForVertices && pcbddc->NullSpace) {
1256     pcbddc->use_change_of_basis = PETSC_TRUE;
1257     pcbddc->use_change_on_faces = PETSC_FALSE;
1258   }
1259   /* print some info */
1260   if (pcbddc->dbg_flag) {
1261     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1262     i = 0;
1263     if (ISForVertices) {
1264       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1267     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1268     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1269     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1270   }
1271   /* check if near null space is attached to global mat */
1272   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1273   if (nearnullsp) {
1274     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1275   } else { /* if near null space is not provided BDDC uses constants by default */
1276     nnsp_size = 0;
1277     nnsp_has_cnst = PETSC_TRUE;
1278   }
1279   /* get max number of constraints on a single cc */
1280   max_constraints = nnsp_size;
1281   if (nnsp_has_cnst) max_constraints++;
1282 
1283   /*
1284        Evaluate maximum storage size needed by the procedure
1285        - temp_indices will contain start index of each constraint stored as follows
1286        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1287        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1288        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1289                                                                                                                                                          */
1290   total_counts = n_ISForFaces+n_ISForEdges;
1291   total_counts *= max_constraints;
1292   n_vertices = 0;
1293   if (ISForVertices) {
1294     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1295   }
1296   total_counts += n_vertices;
1297   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1298   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1299   total_counts = 0;
1300   max_size_of_constraint = 0;
1301   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1302     if (i<n_ISForEdges) {
1303       used_IS = &ISForEdges[i];
1304     } else {
1305       used_IS = &ISForFaces[i-n_ISForEdges];
1306     }
1307     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1308     total_counts += j;
1309     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1310   }
1311   total_counts *= max_constraints;
1312   total_counts += n_vertices;
1313   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1314   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1315   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1316   /* local to boundary numbering */
1317   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1318   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1319   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1320   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1321   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1322   /* get local part of global near null space vectors */
1323   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1324   for (k=0;k<nnsp_size;k++) {
1325     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1326     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328   }
1329 
1330   /* whether or not to skip lapack calls */
1331   skip_lapack = PETSC_TRUE;
1332   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1333 
1334   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1335   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1336     PetscScalar temp_work;
1337 #if defined(PETSC_MISSING_LAPACK_GESVD)
1338     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1339     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1340     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1341     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1342 #if defined(PETSC_USE_COMPLEX)
1343     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1344 #endif
1345     /* now we evaluate the optimal workspace using query with lwork=-1 */
1346     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1347     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1348     lwork = -1;
1349     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1350 #if !defined(PETSC_USE_COMPLEX)
1351     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1352 #else
1353     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1354 #endif
1355     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1356     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1357 #else /* on missing GESVD */
1358     /* SVD */
1359     PetscInt max_n,min_n;
1360     max_n = max_size_of_constraint;
1361     min_n = max_constraints;
1362     if (max_size_of_constraint < max_constraints) {
1363       min_n = max_size_of_constraint;
1364       max_n = max_constraints;
1365     }
1366     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1367 #if defined(PETSC_USE_COMPLEX)
1368     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1369 #endif
1370     /* now we evaluate the optimal workspace using query with lwork=-1 */
1371     lwork = -1;
1372     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1373     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1374     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1375     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1376 #if !defined(PETSC_USE_COMPLEX)
1377     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1378 #else
1379     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1380 #endif
1381     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1382     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1383 #endif /* on missing GESVD */
1384     /* Allocate optimal workspace */
1385     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1386     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1387   }
1388   /* Now we can loop on constraining sets */
1389   total_counts = 0;
1390   temp_indices[0] = 0;
1391   /* vertices */
1392   if (ISForVertices) {
1393     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1394     if (nnsp_has_cnst) { /* consider all vertices */
1395       for (i=0;i<n_vertices;i++) {
1396         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1397         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1398         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1399         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1400         total_counts++;
1401       }
1402     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1403       PetscBool used_vertex;
1404       for (i=0;i<n_vertices;i++) {
1405         used_vertex = PETSC_FALSE;
1406         k = 0;
1407         while (!used_vertex && k<nnsp_size) {
1408           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1409           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1410             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1411             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1412             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1413             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1414             total_counts++;
1415             used_vertex = PETSC_TRUE;
1416           }
1417           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1418           k++;
1419         }
1420       }
1421     }
1422     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1423     n_vertices = total_counts;
1424   }
1425 
1426   /* edges and faces */
1427   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1428     if (i<n_ISForEdges) {
1429       used_IS = &ISForEdges[i];
1430       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1431     } else {
1432       used_IS = &ISForFaces[i-n_ISForEdges];
1433       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1434     }
1435     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1436     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1437     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1438     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1439     /* change of basis should not be performed on local periodic nodes */
1440     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1441     if (nnsp_has_cnst) {
1442       PetscScalar quad_value;
1443       temp_constraints++;
1444       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1445       for (j=0;j<size_of_constraint;j++) {
1446         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1447         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1448         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1449       }
1450       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1451       total_counts++;
1452     }
1453     for (k=0;k<nnsp_size;k++) {
1454       PetscReal real_value;
1455       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1456       for (j=0;j<size_of_constraint;j++) {
1457         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1458         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1459         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1460       }
1461       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1462       /* check if array is null on the connected component */
1463       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1464       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1465       if (real_value > 0.0) { /* keep indices and values */
1466         temp_constraints++;
1467         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1468         total_counts++;
1469       }
1470     }
1471     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1472     valid_constraints = temp_constraints;
1473     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1474     if (!pcbddc->use_nnsp_true && temp_constraints) {
1475       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1476 
1477 #if defined(PETSC_MISSING_LAPACK_GESVD)
1478       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1479          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1480          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1481             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1482             from that computed using LAPACKgesvd
1483          -> This is due to a different computation of eigenvectors in LAPACKheev
1484          -> The quality of the POD-computed basis will be the same */
1485       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1486       /* Store upper triangular part of correlation matrix */
1487       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1488       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1489       for (j=0;j<temp_constraints;j++) {
1490         for (k=0;k<j+1;k++) {
1491           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1492         }
1493       }
1494       /* compute eigenvalues and eigenvectors of correlation matrix */
1495       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1496       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1497 #if !defined(PETSC_USE_COMPLEX)
1498       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1499 #else
1500       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1501 #endif
1502       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1503       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1504       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1505       j=0;
1506       while (j < temp_constraints && singular_vals[j] < tol) j++;
1507       total_counts=total_counts-j;
1508       valid_constraints = temp_constraints-j;
1509       /* scale and copy POD basis into used quadrature memory */
1510       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1514       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1515       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1516       if (j<temp_constraints) {
1517         PetscInt ii;
1518         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1519         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1520         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1521         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1522         for (k=0;k<temp_constraints-j;k++) {
1523           for (ii=0;ii<size_of_constraint;ii++) {
1524             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1525           }
1526         }
1527       }
1528 #else  /* on missing GESVD */
1529       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1530       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1532       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1533 #if !defined(PETSC_USE_COMPLEX)
1534       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1535 #else
1536       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1537 #endif
1538       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1539       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1540       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1541       k = temp_constraints;
1542       if (k > size_of_constraint) k = size_of_constraint;
1543       j = 0;
1544       while (j < k && singular_vals[k-j-1] < tol) j++;
1545       total_counts = total_counts-temp_constraints+k-j;
1546       valid_constraints = k-j;
1547 #endif /* on missing GESVD */
1548     }
1549     /* setting change_of_basis flag is safe now */
1550     if (boolforchange) {
1551       for (j=0;j<valid_constraints;j++) {
1552         PetscBTSet(change_basis,total_counts-j-1);
1553       }
1554     }
1555   }
1556   /* free index sets of faces, edges and vertices */
1557   for (i=0;i<n_ISForFaces;i++) {
1558     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1559   }
1560   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1561   for (i=0;i<n_ISForEdges;i++) {
1562     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1563   }
1564   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1565   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1566 
1567   /* free workspace */
1568   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1569     ierr = PetscFree(work);CHKERRQ(ierr);
1570 #if defined(PETSC_USE_COMPLEX)
1571     ierr = PetscFree(rwork);CHKERRQ(ierr);
1572 #endif
1573     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1574 #if defined(PETSC_MISSING_LAPACK_GESVD)
1575     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1576     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1577 #endif
1578   }
1579   for (k=0;k<nnsp_size;k++) {
1580     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1581   }
1582   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1583 
1584   /* set quantities in pcbddc data structure */
1585   /* n_vertices defines the number of subdomain corners in the primal space */
1586   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1587   pcbddc->local_primal_size = total_counts;
1588   pcbddc->n_vertices = n_vertices;
1589   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1590 
1591   /* Create constraint matrix */
1592   /* The constraint matrix is used to compute the l2g map of primal dofs */
1593   /* so we need to set it up properly either with or without change of basis */
1594   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1595   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1596   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1597   /* array to compute a local numbering of constraints : vertices first then constraints */
1598   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1599   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1600   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1601   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1602   /* auxiliary stuff for basis change */
1603   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1604   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1605 
1606   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1607   total_primal_vertices=0;
1608   for (i=0;i<pcbddc->local_primal_size;i++) {
1609     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1610     if (size_of_constraint == 1) {
1611       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1612       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1613       aux_primal_minloc[total_primal_vertices]=0;
1614       total_primal_vertices++;
1615     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1616       PetscInt min_loc,min_index;
1617       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1618       /* find first untouched local node */
1619       k = 0;
1620       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1621       min_index = global_indices[k];
1622       min_loc = k;
1623       /* search the minimum among global nodes already untouched on the cc */
1624       for (k=1;k<size_of_constraint;k++) {
1625         /* there can be more than one constraint on a single connected component */
1626         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1627           min_index = global_indices[k];
1628           min_loc = k;
1629         }
1630       }
1631       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1632       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1633       aux_primal_minloc[total_primal_vertices]=min_loc;
1634       total_primal_vertices++;
1635     }
1636   }
1637   /* determine if a QR strategy is needed for change of basis */
1638   qr_needed = PETSC_FALSE;
1639   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1640   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1641     if (PetscBTLookup(change_basis,i)) {
1642       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1643       j = 0;
1644       for (k=0;k<size_of_constraint;k++) {
1645         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1646           j++;
1647         }
1648       }
1649       /* found more than one primal dof on the cc */
1650       if (j > 1) {
1651         PetscBTSet(qr_needed_idx,i);
1652         qr_needed = PETSC_TRUE;
1653       }
1654     }
1655   }
1656   /* free workspace */
1657   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1658 
1659   /* permute indices in order to have a sorted set of vertices */
1660   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1661 
1662   /* nonzero structure of constraint matrix */
1663   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1664   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1665   j=total_primal_vertices;
1666   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1667     if (!PetscBTLookup(change_basis,i)) {
1668       nnz[j]=temp_indices[i+1]-temp_indices[i];
1669       j++;
1670     }
1671   }
1672   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1673   ierr = PetscFree(nnz);CHKERRQ(ierr);
1674   /* set values in constraint matrix */
1675   for (i=0;i<total_primal_vertices;i++) {
1676     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1677   }
1678   total_counts = total_primal_vertices;
1679   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1680     if (!PetscBTLookup(change_basis,i)) {
1681       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1682       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1683       total_counts++;
1684     }
1685   }
1686   /* assembling */
1687   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   /*
1690   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1691   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1692   */
1693   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1694   if (pcbddc->use_change_of_basis) {
1695     /* dual and primal dofs on a single cc */
1696     PetscInt     dual_dofs,primal_dofs;
1697     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1698     PetscInt     primal_counter;
1699     /* working stuff for GEQRF */
1700     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1701     PetscBLASInt lqr_work;
1702     /* working stuff for UNGQR */
1703     PetscScalar  *gqr_work,lgqr_work_t;
1704     PetscBLASInt lgqr_work;
1705     /* working stuff for TRTRS */
1706     PetscScalar  *trs_rhs;
1707     PetscBLASInt Blas_NRHS;
1708     /* pointers for values insertion into change of basis matrix */
1709     PetscInt     *start_rows,*start_cols;
1710     PetscScalar  *start_vals;
1711     /* working stuff for values insertion */
1712     PetscBT      is_primal;
1713 
1714     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1715     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1716     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1717     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1718     /* work arrays */
1719     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1720     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1721     /* nonzeros per row */
1722     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1723       if (PetscBTLookup(change_basis,i)) {
1724         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1725         if (PetscBTLookup(qr_needed_idx,i)) {
1726           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1727         } else {
1728           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1729           /* get local primal index on the cc */
1730           j = 0;
1731           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1732           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1733         }
1734       }
1735     }
1736     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1737     ierr = PetscFree(nnz);CHKERRQ(ierr);
1738     /* Set initial identity in the matrix */
1739     for (i=0;i<pcis->n_B;i++) {
1740       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1741     }
1742 
1743     if (pcbddc->dbg_flag) {
1744       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1745       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1746     }
1747 
1748 
1749     /* Now we loop on the constraints which need a change of basis */
1750     /*
1751        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1752        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1753 
1754        Basic blocks of change of basis matrix T computed
1755 
1756           - Using the following block transformation if there is only a primal dof on the cc
1757             (in the example, primal dof is the last one of the edge in LOCAL ordering
1758              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1759             | 1        0   ...        0     1 |
1760             | 0        1   ...        0     1 |
1761             |              ...                |
1762             | 0        ...            1     1 |
1763             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1764 
1765           - via QR decomposition of constraints otherwise
1766     */
1767     if (qr_needed) {
1768       /* space to store Q */
1769       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1770       /* first we issue queries for optimal work */
1771       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1772       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1773       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1774       lqr_work = -1;
1775       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1776       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1777       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1778       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1779       lgqr_work = -1;
1780       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1781       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1782       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1783       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1784       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1785       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1786       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1787       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1788       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1789       /* array to store scaling factors for reflectors */
1790       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1791       /* array to store rhs and solution of triangular solver */
1792       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1793       /* allocating workspace for check */
1794       if (pcbddc->dbg_flag) {
1795         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1796       }
1797     }
1798     /* array to store whether a node is primal or not */
1799     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1800     for (i=0;i<total_primal_vertices;i++) {
1801       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1802     }
1803 
1804     /* loop on constraints and see whether or not they need a change of basis and compute it */
1805     /* -> using implicit ordering contained in temp_indices data */
1806     total_counts = pcbddc->n_vertices;
1807     primal_counter = total_counts;
1808     while (total_counts<pcbddc->local_primal_size) {
1809       primal_dofs = 1;
1810       if (PetscBTLookup(change_basis,total_counts)) {
1811         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1812         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1813           primal_dofs++;
1814         }
1815         /* get constraint info */
1816         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1817         dual_dofs = size_of_constraint-primal_dofs;
1818 
1819         if (pcbddc->dbg_flag) {
1820           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1821         }
1822 
1823         if (primal_dofs > 1) { /* QR */
1824 
1825           /* copy quadrature constraints for change of basis check */
1826           if (pcbddc->dbg_flag) {
1827             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1828           }
1829           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1830           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1831 
1832           /* compute QR decomposition of constraints */
1833           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1834           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1835           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1836           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1837           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1838           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1839           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1840 
1841           /* explictly compute R^-T */
1842           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1843           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1844           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1845           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1846           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1847           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1848           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1849           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1850           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1851           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1852 
1853           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1854           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1855           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1856           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1857           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1858           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1859           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1860           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1861           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1862 
1863           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1864              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1865              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1866           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1869           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1870           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1871           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1872           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1873           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1874           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1875           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1876 
1877           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1878           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1879           /* insert cols for primal dofs */
1880           for (j=0;j<primal_dofs;j++) {
1881             start_vals = &qr_basis[j*size_of_constraint];
1882             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1883             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1884           }
1885           /* insert cols for dual dofs */
1886           for (j=0,k=0;j<dual_dofs;k++) {
1887             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1888               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1889               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1890               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1891               j++;
1892             }
1893           }
1894 
1895           /* check change of basis */
1896           if (pcbddc->dbg_flag) {
1897             PetscInt   ii,jj;
1898             PetscBool valid_qr=PETSC_TRUE;
1899             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1901             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1902             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1903             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1904             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1905             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1906             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1907             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1908             for (jj=0;jj<size_of_constraint;jj++) {
1909               for (ii=0;ii<primal_dofs;ii++) {
1910                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1911                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1912               }
1913             }
1914             if (!valid_qr) {
1915               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1916               for (jj=0;jj<size_of_constraint;jj++) {
1917                 for (ii=0;ii<primal_dofs;ii++) {
1918                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1919                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1920                   }
1921                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1922                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1923                   }
1924                 }
1925               }
1926             } else {
1927               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1928             }
1929           }
1930         } else { /* simple transformation block */
1931           PetscInt row,col;
1932           PetscScalar val;
1933           for (j=0;j<size_of_constraint;j++) {
1934             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1935             if (!PetscBTLookup(is_primal,row)) {
1936               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1937               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1938               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1939             } else {
1940               for (k=0;k<size_of_constraint;k++) {
1941                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1942                 if (row != col) {
1943                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1944                 } else {
1945                   val = 1.0;
1946                 }
1947                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1948               }
1949             }
1950           }
1951         }
1952         /* increment primal counter */
1953         primal_counter += primal_dofs;
1954       } else {
1955         if (pcbddc->dbg_flag) {
1956           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1957         }
1958       }
1959       /* increment constraint counter total_counts */
1960       total_counts += primal_dofs;
1961     }
1962 
1963     /* free workspace */
1964     if (qr_needed) {
1965       if (pcbddc->dbg_flag) {
1966         ierr = PetscFree(work);CHKERRQ(ierr);
1967       }
1968       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1969       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1970       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1971       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1972       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1973     }
1974     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1975     /* assembling */
1976     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1977     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1978     /*
1979     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1980     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1981     */
1982   }
1983 
1984   /* flush dbg viewer */
1985   if (pcbddc->dbg_flag) {
1986     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1987   }
1988 
1989   /* free workspace */
1990   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1991   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1992   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1993   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1994   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1995   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1996   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1997   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1998   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1999   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2005 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2006 {
2007   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2008   PC_IS       *pcis = (PC_IS*)pc->data;
2009   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2010   PetscInt    bs,ierr,i,vertex_size;
2011   PetscViewer viewer=pcbddc->dbg_viewer;
2012 
2013   PetscFunctionBegin;
2014   /* Init local Graph struct */
2015   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2016 
2017   /* Check validity of the csr graph passed in by the user */
2018   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2019     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2020   }
2021 
2022   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2023   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2024     Mat mat_adj;
2025     const PetscInt *xadj,*adjncy;
2026     PetscBool flg_row=PETSC_TRUE;
2027 
2028     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2029     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2030     if (!flg_row) {
2031       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2032     }
2033     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2034     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2035     if (!flg_row) {
2036       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2037     }
2038     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2039   }
2040 
2041   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2042   vertex_size = 1;
2043   if (!pcbddc->user_provided_isfordofs) {
2044     if (!pcbddc->n_ISForDofs) {
2045       IS *custom_ISForDofs;
2046 
2047       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2048       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2049       for (i=0;i<bs;i++) {
2050         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2051       }
2052       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2053       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2054       /* remove my references to IS objects */
2055       for (i=0;i<bs;i++) {
2056         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2057       }
2058       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2059     }
2060   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2061     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2062   }
2063 
2064   /* Setup of Graph */
2065   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2066 
2067   /* Graph's connected components analysis */
2068   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2069 
2070   /* print some info to stdout */
2071   if (pcbddc->dbg_flag) {
2072     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2079 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2080 {
2081   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2082   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   n = 0;
2087   vertices = 0;
2088   if (pcbddc->ConstraintMatrix) {
2089     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2090     for (i=0;i<local_primal_size;i++) {
2091       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2092       if (size_of_constraint == 1) n++;
2093       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2094     }
2095     if (vertices_idx) {
2096       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2097       n = 0;
2098       for (i=0;i<local_primal_size;i++) {
2099         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2100         if (size_of_constraint == 1) {
2101           vertices[n++]=row_cmat_indices[0];
2102         }
2103         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2104       }
2105     }
2106   }
2107   *n_vertices = n;
2108   if (vertices_idx) *vertices_idx = vertices;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2114 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2115 {
2116   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2117   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2118   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2119   PetscBT        touched;
2120   PetscErrorCode ierr;
2121 
2122     /* This function assumes that the number of local constraints per connected component
2123        is not greater than the number of nodes defined for the connected component
2124        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2125   PetscFunctionBegin;
2126   n = 0;
2127   constraints_index = 0;
2128   if (pcbddc->ConstraintMatrix) {
2129     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2130     max_size_of_constraint = 0;
2131     for (i=0;i<local_primal_size;i++) {
2132       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2133       if (size_of_constraint > 1) {
2134         n++;
2135       }
2136       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2137       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2138     }
2139     if (constraints_idx) {
2140       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2141       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2142       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2143       n = 0;
2144       for (i=0;i<local_primal_size;i++) {
2145         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2146         if (size_of_constraint > 1) {
2147           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2148           /* find first untouched local node */
2149           j = 0;
2150           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2151           min_index = row_cmat_global_indices[j];
2152           min_loc = j;
2153           /* search the minimum among nodes not yet touched on the connected component
2154              since there can be more than one constraint on a single cc */
2155           for (j=1;j<size_of_constraint;j++) {
2156             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2157               min_index = row_cmat_global_indices[j];
2158               min_loc = j;
2159             }
2160           }
2161           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2162           constraints_index[n++] = row_cmat_indices[min_loc];
2163         }
2164         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2165       }
2166       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2167       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2168     }
2169   }
2170   *n_constraints = n;
2171   if (constraints_idx) *constraints_idx = constraints_index;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* the next two functions has been adapted from pcis.c */
2176 #undef __FUNCT__
2177 #define __FUNCT__ "PCBDDCApplySchur"
2178 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2179 {
2180   PetscErrorCode ierr;
2181   PC_IS          *pcis = (PC_IS*)(pc->data);
2182 
2183   PetscFunctionBegin;
2184   if (!vec2_B) { vec2_B = v; }
2185   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2186   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2187   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2188   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2189   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2195 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2196 {
2197   PetscErrorCode ierr;
2198   PC_IS          *pcis = (PC_IS*)(pc->data);
2199 
2200   PetscFunctionBegin;
2201   if (!vec2_B) { vec2_B = v; }
2202   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2203   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2204   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2205   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2206   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "PCBDDCSubsetNumbering"
2212 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2213 {
2214   Vec            local_vec,global_vec;
2215   IS             seqis,paris;
2216   VecScatter     scatter_ctx;
2217   PetscScalar    *array;
2218   PetscInt       *temp_global_dofs;
2219   PetscScalar    globalsum;
2220   PetscInt       i,j,s;
2221   PetscInt       nlocals,first_index,old_index,max_local;
2222   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2223   PetscMPIInt    *dof_sizes,*dof_displs;
2224   PetscBool      first_found;
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   /* mpi buffers */
2229   MPI_Comm_size(comm,&size_prec_comm);
2230   MPI_Comm_rank(comm,&rank_prec_comm);
2231   j = ( !rank_prec_comm ? size_prec_comm : 0);
2232   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2233   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2234   /* get maximum size of subset */
2235   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2236   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2237   max_local = 0;
2238   if (n_local_dofs) {
2239     max_local = temp_global_dofs[0];
2240     for (i=1;i<n_local_dofs;i++) {
2241       if (max_local < temp_global_dofs[i] ) {
2242         max_local = temp_global_dofs[i];
2243       }
2244     }
2245   }
2246   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2247   max_global++;
2248   max_local = 0;
2249   if (n_local_dofs) {
2250     max_local = local_dofs[0];
2251     for (i=1;i<n_local_dofs;i++) {
2252       if (max_local < local_dofs[i] ) {
2253         max_local = local_dofs[i];
2254       }
2255     }
2256   }
2257   max_local++;
2258   /* allocate workspace */
2259   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2260   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2261   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2262   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2263   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2264   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2265   /* create scatter */
2266   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2267   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2268   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2269   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2270   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2271   /* init array */
2272   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2273   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2274   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2275   if (local_dofs_mult) {
2276     for (i=0;i<n_local_dofs;i++) {
2277       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2278     }
2279   } else {
2280     for (i=0;i<n_local_dofs;i++) {
2281       array[local_dofs[i]]=1.0;
2282     }
2283   }
2284   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2285   /* scatter into global vec and get total number of global dofs */
2286   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2289   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2290   /* Fill global_vec with cumulative function for global numbering */
2291   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2292   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2293   nlocals = 0;
2294   first_index = -1;
2295   first_found = PETSC_FALSE;
2296   for (i=0;i<s;i++) {
2297     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2298       first_found = PETSC_TRUE;
2299       first_index = i;
2300     }
2301     nlocals += (PetscInt)PetscRealPart(array[i]);
2302   }
2303   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2304   if (!rank_prec_comm) {
2305     dof_displs[0]=0;
2306     for (i=1;i<size_prec_comm;i++) {
2307       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2308     }
2309   }
2310   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2311   if (first_found) {
2312     array[first_index] += (PetscScalar)nlocals;
2313     old_index = first_index;
2314     for (i=first_index+1;i<s;i++) {
2315       if (PetscRealPart(array[i]) > 0.0) {
2316         array[i] += array[old_index];
2317         old_index = i;
2318       }
2319     }
2320   }
2321   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2322   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2323   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2324   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2325   /* get global ordering of local dofs */
2326   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2327   if (local_dofs_mult) {
2328     for (i=0;i<n_local_dofs;i++) {
2329       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2330     }
2331   } else {
2332     for (i=0;i<n_local_dofs;i++) {
2333       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2334     }
2335   }
2336   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2337   /* free workspace */
2338   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2339   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2340   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2341   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2342   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2343   /* return pointer to global ordering of local dofs */
2344   *global_numbering_subset = temp_global_dofs;
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2350 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2351 {
2352   PetscInt       i,j;
2353   PetscScalar    *alphas;
2354   PetscErrorCode ierr;
2355 
2356   PetscFunctionBegin;
2357   /* this implements stabilized Gram-Schmidt */
2358   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2359   for (i=0;i<n;i++) {
2360     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2361     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2362     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2363   }
2364   ierr = PetscFree(alphas);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatConvert_IS_AIJ"
2371 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2372 {
2373   Mat new_mat;
2374   Mat_IS *matis = (Mat_IS*)(mat->data);
2375   /* info on mat */
2376   PetscInt bs,rows,cols;
2377   PetscInt lrows,lcols;
2378   PetscInt local_rows,local_cols;
2379   PetscMPIInt nsubdomains;
2380   /* preallocation */
2381   Vec vec_dnz,vec_onz;
2382   PetscScalar *my_dnz,*my_onz,*array;
2383   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2384   PetscInt  index_row,index_col,owner;
2385   PetscInt  *local_indices,*global_indices;
2386   /* work */
2387   PetscInt i,j;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   /* CHECKS */
2392   /* get info from mat */
2393   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2394   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2395   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2396   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2397 
2398   /* MAT_INITIAL_MATRIX */
2399   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2400   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2401   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2402   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2403   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2404   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2405 
2406   /*
2407     preallocation
2408   */
2409 
2410   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2411   /*
2412      Some vectors are needed to sum up properly on shared interface dofs.
2413      Note that preallocation is not exact, since it overestimates nonzeros
2414   */
2415 /*
2416   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2417   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2418   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2419   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2420 */
2421   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2422   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2423   /* All processes need to compute entire row ownership */
2424   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2425   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2426   for (i=0;i<nsubdomains;i++) {
2427     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2428       row_ownership[j]=i;
2429     }
2430   }
2431   /* map indices of local mat to global values */
2432   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2433   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2434   for (i=0;i<local_rows;i++) local_indices[i]=i;
2435   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2436   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2437 
2438   /*
2439      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2440      then, they will be summed up properly. This way, preallocation is always sufficient
2441   */
2442   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2443   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2444   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2445   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2446   for (i=0;i<local_rows;i++) {
2447     index_row = global_indices[i];
2448     for (j=i;j<local_rows;j++) {
2449       owner = row_ownership[index_row];
2450       index_col = global_indices[j];
2451       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2452         my_dnz[i] += 1.0;
2453       } else { /* offdiag block */
2454         my_onz[i] += 1.0;
2455       }
2456       /* same as before, interchanging rows and cols */
2457       if (i != j) {
2458         owner = row_ownership[index_col];
2459         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2460           my_dnz[j] += 1.0;
2461         } else {
2462           my_onz[j] += 1.0;
2463         }
2464       }
2465     }
2466   }
2467   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2468   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2469   if (local_rows) { /* multilevel guard */
2470     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2471     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2472   }
2473   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2474   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2475   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2476   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2477   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2478   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2479   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2480 
2481   /* set computed preallocation in dnz and onz */
2482   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2483   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2484   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2485   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2486   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2487   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2488   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2489   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2490 
2491   /* Resize preallocation if overestimated */
2492   for (i=0;i<lrows;i++) {
2493     dnz[i] = PetscMin(dnz[i],lcols);
2494     onz[i] = PetscMin(onz[i],cols-lcols);
2495   }
2496   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2497   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2498 
2499   /*
2500     Set values. Very Basic.
2501   */
2502   for (i=0;i<local_rows;i++) {
2503     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2504     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2505     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2506     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2507     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2508   }
2509   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2510   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2511   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2512 
2513   /* get back mat */
2514   *M = new_mat;
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatISSubassemble_Private"
2520 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2521 {
2522   Mat             subdomain_adj;
2523   IS              new_ranks,ranks_send_to;
2524   MatPartitioning partitioner;
2525   Mat_IS          *matis;
2526   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2527   PetscInt        prank;
2528   PetscMPIInt     size,rank,color;
2529   PetscInt        *xadj,*adjncy,*oldranks;
2530   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2531   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2532   PetscErrorCode  ierr;
2533   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2534   PetscSubcomm    subcomm;
2535 
2536   PetscFunctionBegin;
2537   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2538   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2539   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2540 
2541   /* Get info on mapping */
2542   matis = (Mat_IS*)(mat->data);
2543   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2544   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2545 
2546   /* build local CSR graph of subdomains' connectivity */
2547   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2548   xadj[0] = 0;
2549   xadj[1] = PetscMax(n_neighs-1,0);
2550   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2551   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2552 
2553   if (threshold) {
2554     PetscInt* count,min_threshold;
2555     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2556     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2557     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2558       for (j=0;j<n_shared[i];j++) {
2559         count[shared[i][j]] += 1;
2560       }
2561     }
2562     /* adapt threshold since we dont want to lose significant connections */
2563     min_threshold = n_neighs;
2564     for (i=1;i<n_neighs;i++) {
2565       for (j=0;j<n_shared[i];j++) {
2566         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2567       }
2568     }
2569     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2570     threshold = PetscMax(min_threshold+1,threshold);
2571     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2572     xadj[1] = 0;
2573     for (i=1;i<n_neighs;i++) {
2574       for (j=0;j<n_shared[i];j++) {
2575         if (count[shared[i][j]] < threshold) {
2576           adjncy[xadj[1]] = neighs[i];
2577           adjncy_wgt[xadj[1]] = n_shared[i];
2578           xadj[1]++;
2579           break;
2580         }
2581       }
2582     }
2583     ierr = PetscFree(count);CHKERRQ(ierr);
2584   } else {
2585     if (xadj[1]) {
2586       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2587       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2588     }
2589   }
2590   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2591   if (use_square) {
2592     for (i=0;i<xadj[1];i++) {
2593       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2594     }
2595   }
2596   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2597 
2598   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2599 
2600   /*
2601     Restrict work on active processes only.
2602   */
2603   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2604   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2605   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2606   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2607   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2608   if (color) {
2609     ierr = PetscFree(xadj);CHKERRQ(ierr);
2610     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2611     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2612   } else {
2613     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2614     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2615     prank = rank;
2616     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2617     for (i=0;i<size;i++) {
2618       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2619     }
2620     for (i=0;i<xadj[1];i++) {
2621       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2622     }
2623     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2624     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2625     n_subdomains = (PetscInt)size;
2626     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2627 
2628     /* Partition */
2629     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2630     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2631     if (use_vwgt) {
2632       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2633       v_wgt[0] = local_size;
2634       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2635     }
2636     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2637     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2638     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2639     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2640     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2641 
2642     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2643     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2644     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2645     /* clean up */
2646     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2647     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2648     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2649     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2650   }
2651   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2652 
2653   /* assemble parallel IS for sends */
2654   i = 1;
2655   if (color) i=0;
2656   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2657   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2658 
2659   /* get back IS */
2660   *is_sends = ranks_send_to;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatISSubassemble"
2668 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2669 {
2670   Mat                    local_mat,new_mat;
2671   Mat_IS                 *matis;
2672   IS                     is_sends_internal;
2673   PetscInt               rows,cols;
2674   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2675   PetscBool              ismatis,isdense;
2676   ISLocalToGlobalMapping l2gmap;
2677   PetscInt*              l2gmap_indices;
2678   const PetscInt*        is_indices;
2679   MatType                new_local_type;
2680   MatTypePrivate         new_local_type_private;
2681   /* buffers */
2682   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2683   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2684   /* MPI */
2685   MPI_Comm               comm;
2686   PetscMPIInt            n_sends,n_recvs,commsize;
2687   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2688   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2689   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2690   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2691   PetscErrorCode         ierr;
2692 
2693   PetscFunctionBegin;
2694   /* checks */
2695   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2696   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2697   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2698   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2699   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2700   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2701   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2702   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2703   PetscValidLogicalCollectiveInt(mat,bs,0);
2704   /* prepare IS for sending if not provided */
2705   if (!is_sends) {
2706     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2707     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2708   } else {
2709     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2710     is_sends_internal = is_sends;
2711   }
2712 
2713   /* get pointer of MATIS data */
2714   matis = (Mat_IS*)mat->data;
2715 
2716   /* get comm */
2717   comm = PetscObjectComm((PetscObject)mat);
2718 
2719   /* compute number of sends */
2720   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2721   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2722 
2723   /* compute number of receives */
2724   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2725   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2726   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2727   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2728   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2729   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2730   ierr = PetscFree(iflags);CHKERRQ(ierr);
2731 
2732   /* prepare send/receive buffers */
2733   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2734   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2735   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2736   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2737 
2738   /* Get data from local mat */
2739   if (!isdense) {
2740     /* TODO: See below some guidelines on how to prepare the local buffers */
2741     /*
2742        send_buffer_vals should contain the raw values of the local matrix
2743        send_buffer_idxs should contain:
2744        - MatType_PRIVATE type
2745        - PetscInt        size_of_l2gmap
2746        - PetscInt        global_row_indices[size_of_l2gmap]
2747        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2748     */
2749   } else {
2750     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2751     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2752     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2753     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2754     send_buffer_idxs[1] = i;
2755     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2756     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2757     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2758     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2759     for (i=0;i<n_sends;i++) {
2760       ilengths_vals[is_indices[i]] = len*len;
2761       ilengths_idxs[is_indices[i]] = len+2;
2762     }
2763   }
2764   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2765   buf_size_idxs = 0;
2766   buf_size_vals = 0;
2767   for (i=0;i<n_recvs;i++) {
2768     buf_size_idxs += (PetscInt)olengths_idxs[i];
2769     buf_size_vals += (PetscInt)olengths_vals[i];
2770   }
2771   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2772   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2773 
2774   /* get new tags for clean communications */
2775   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2776   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2777 
2778   /* allocate for requests */
2779   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2780   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2781   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2782   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2783 
2784   /* communications */
2785   ptr_idxs = recv_buffer_idxs;
2786   ptr_vals = recv_buffer_vals;
2787   for (i=0;i<n_recvs;i++) {
2788     source_dest = onodes[i];
2789     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2790     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2791     ptr_idxs += olengths_idxs[i];
2792     ptr_vals += olengths_vals[i];
2793   }
2794   for (i=0;i<n_sends;i++) {
2795     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2796     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2797     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2798   }
2799   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2800   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2801 
2802   /* assemble new l2g map */
2803   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2804   ptr_idxs = recv_buffer_idxs;
2805   buf_size_idxs = 0;
2806   for (i=0;i<n_recvs;i++) {
2807     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2808     ptr_idxs += olengths_idxs[i];
2809   }
2810   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2811   ptr_idxs = recv_buffer_idxs;
2812   buf_size_idxs = 0;
2813   for (i=0;i<n_recvs;i++) {
2814     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2815     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2816     ptr_idxs += olengths_idxs[i];
2817   }
2818   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2819   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2820   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2821 
2822   /* infer new local matrix type from received local matrices type */
2823   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2824   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2825   new_local_type_private = MATAIJ_PRIVATE;
2826   new_local_type = MATSEQAIJ;
2827   if (n_recvs) {
2828     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2829     ptr_idxs = recv_buffer_idxs;
2830     for (i=0;i<n_recvs;i++) {
2831       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2832         new_local_type_private = MATAIJ_PRIVATE;
2833         break;
2834       }
2835       ptr_idxs += olengths_idxs[i];
2836     }
2837     switch (new_local_type_private) {
2838       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2839         new_local_type = MATSEQAIJ;
2840         bs = 1;
2841         break;
2842       case MATAIJ_PRIVATE:
2843         new_local_type = MATSEQAIJ;
2844         bs = 1;
2845         break;
2846       case MATBAIJ_PRIVATE:
2847         new_local_type = MATSEQBAIJ;
2848         break;
2849       case MATSBAIJ_PRIVATE:
2850         new_local_type = MATSEQSBAIJ;
2851         break;
2852       default:
2853         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2854         break;
2855     }
2856   }
2857 
2858   /* create MATIS object */
2859   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2860   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2861   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2862   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2863   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2864   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2865 
2866   /* set values */
2867   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2868   ptr_vals = recv_buffer_vals;
2869   ptr_idxs = recv_buffer_idxs;
2870   for (i=0;i<n_recvs;i++) {
2871     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2872       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2873       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2874       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2875       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2876       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2877     }
2878     ptr_idxs += olengths_idxs[i];
2879     ptr_vals += olengths_vals[i];
2880   }
2881   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885 
2886   { /* check */
2887     Vec       lvec,rvec;
2888     PetscReal infty_error;
2889 
2890     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2891     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2892     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2893     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2894     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2895     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2896     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2897     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2898     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2899   }
2900 
2901   /* free workspace */
2902   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2903   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2904   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2905   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2906   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2907   if (isdense) {
2908     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2909     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2910   } else {
2911     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2912   }
2913   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2914   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2915   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2916   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2917   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2918   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2919   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2920   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2921   ierr = PetscFree(onodes);CHKERRQ(ierr);
2922   /* get back new mat */
2923   *mat_n = new_mat;
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2929 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2930 {
2931   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2932   PC_IS                  *pcis = (PC_IS*)pc->data;
2933   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2934   MatNullSpace           CoarseNullSpace=NULL;
2935   ISLocalToGlobalMapping coarse_islg;
2936   IS                     coarse_is;
2937   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2938   PetscInt               im_active=-1,active_procs=-1;
2939   PC                     pc_temp;
2940   PCType                 coarse_pc_type;
2941   KSPType                coarse_ksp_type;
2942   PetscBool              multilevel_requested,multilevel_allowed;
2943   PetscBool              setsym,issym,isbddc,isnn;
2944   MatStructure           matstruct;
2945   PetscErrorCode         ierr;
2946 
2947   PetscFunctionBegin;
2948   /* Assign global numbering to coarse dofs */
2949   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2950 
2951   /* infer some info from user */
2952   issym = PETSC_FALSE;
2953   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2954   multilevel_allowed = PETSC_FALSE;
2955   multilevel_requested = PETSC_FALSE;
2956   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2957   if (multilevel_requested) {
2958     /* count "active processes" */
2959     im_active = !!(pcis->n);
2960     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2961     if (active_procs/pcbddc->coarsening_ratio < 2) {
2962       multilevel_allowed = PETSC_FALSE;
2963     } else {
2964       multilevel_allowed = PETSC_TRUE;
2965     }
2966   }
2967 
2968   /* set defaults for coarse KSP and PC */
2969   if (multilevel_allowed) {
2970     if (issym) {
2971       coarse_ksp_type = KSPRICHARDSON;
2972     } else {
2973       coarse_ksp_type = KSPCHEBYSHEV;
2974     }
2975     coarse_pc_type = PCBDDC;
2976   } else {
2977     coarse_ksp_type = KSPPREONLY;
2978     coarse_pc_type = PCREDUNDANT;
2979   }
2980 
2981   /* create the coarse KSP object only once with defaults */
2982   if (!pcbddc->coarse_ksp) {
2983     char prefix[256],str_level[3];
2984     size_t len;
2985     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2986     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2987     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2988     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2989     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2990     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2991     /* prefix */
2992     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2993     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2994     if (!pcbddc->current_level) {
2995       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2996       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2997     } else {
2998       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2999       if (pcbddc->current_level>1) len -= 2;
3000       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3001       *(prefix+len)='\0';
3002       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3003       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3004     }
3005     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3006   }
3007   /* allow user customization */
3008   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3009   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3010   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3011 
3012   /* get some info after set from options */
3013   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3014   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3015   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3016   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3017     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3018     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3019     isbddc = PETSC_FALSE;
3020   }
3021 
3022   /* propagate BDDC info to the next level */
3023   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3024   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3025   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3026 
3027   /* creates temporary MATIS object for coarse matrix */
3028   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3029   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3030   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3031   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3032   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3033   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3034   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3035   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3036   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3037   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3038 
3039   /* assemble coarse matrix */
3040   if (isbddc || isnn) {
3041     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3042   } else {
3043     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3044   }
3045   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3046 
3047   /* create local to global scatters for coarse problem */
3048   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3049   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3050   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3051 
3052   /* propagate symmetry info to coarse matrix */
3053   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3054 
3055   /* Compute coarse null space (special handling by BDDC only) */
3056   if (pcbddc->NullSpace) {
3057     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3058     if (isbddc) {
3059       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3060     } else {
3061       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3062     }
3063   }
3064 
3065   /* set operators */
3066   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3067   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3068 
3069   /* additional KSP customization */
3070   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3071   if (max_it < 5) {
3072     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3073   }
3074   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3075 
3076 
3077   /* print some info if requested */
3078   if (pcbddc->dbg_flag) {
3079     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3080     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3081     if (!multilevel_allowed) {
3082       if (multilevel_requested) {
3083         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3084       } else if (pcbddc->max_levels) {
3085         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3086       }
3087     }
3088     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3089     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3090   }
3091 
3092   /* setup coarse ksp */
3093   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3094   if (pcbddc->dbg_flag) {
3095     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3096     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3097     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3098     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3099   }
3100 
3101   /* Check coarse problem if requested */
3102   if (pcbddc->dbg_flag) {
3103     KSP       check_ksp;
3104     KSPType   check_ksp_type;
3105     PC        check_pc;
3106     Vec       check_vec;
3107     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3108     PetscInt  its;
3109     PetscBool ispreonly,compute;
3110 
3111     /* Create ksp object suitable for estimation of extreme eigenvalues */
3112     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3113     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3114     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3115     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3116     if (ispreonly) {
3117       check_ksp_type = KSPPREONLY;
3118       compute = PETSC_FALSE;
3119     } else {
3120       if (issym) check_ksp_type = KSPCG;
3121       else check_ksp_type = KSPGMRES;
3122       compute = PETSC_TRUE;
3123     }
3124     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3125     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3126     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3127     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3128     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3129     /* create random vec */
3130     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3131     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3132     if (CoarseNullSpace) {
3133       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3134     }
3135     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3136     /* solve coarse problem */
3137     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3138     if (CoarseNullSpace) {
3139       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3140     }
3141     /* check coarse problem residual error */
3142     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3143     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3144     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3145     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3146     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3147     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3148     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3149     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3150     /* get eigenvalue estimation if preonly has not been requested */
3151     if (!ispreonly) {
3152       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3153       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3154       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3155     }
3156     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3157     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3158   }
3159   /* free memory */
3160   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3161   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3167 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3168 {
3169   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3170   PC_IS*         pcis = (PC_IS*)pc->data;
3171   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3172   PetscInt       i,j,coarse_size;
3173   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3174   PetscErrorCode ierr;
3175 
3176   PetscFunctionBegin;
3177   /* get indices in local ordering for vertices and constraints */
3178   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3179   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3180   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3181   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3182   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3183   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3184   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3185 
3186   /* Compute global number of coarse dofs */
3187   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3188 
3189   /* check numbering */
3190   if (pcbddc->dbg_flag) {
3191     PetscScalar coarsesum,*array;
3192 
3193     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3194     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3195     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3196     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3197     for (i=0;i<pcbddc->local_primal_size;i++) {
3198       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3199     }
3200     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3201     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3202     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3203     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3204     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3205     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3206     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3207     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3208     for (i=0;i<pcis->n;i++) {
3209       if (array[i] == 1.0) {
3210         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3211       }
3212     }
3213     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3214     for (i=0;i<pcis->n;i++) {
3215       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3216     }
3217     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3218     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3219     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3220     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3221     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3222     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3223     if (pcbddc->dbg_flag > 1) {
3224       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3225       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3226       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3227       for (i=0;i<pcbddc->local_primal_size;i++) {
3228         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3229       }
3230       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3231     }
3232     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3233   }
3234   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3235   /* get back data */
3236   *coarse_size_n = coarse_size;
3237   *local_primal_indices_n = local_primal_indices;
3238   PetscFunctionReturn(0);
3239 }
3240 
3241