xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 20c7b3777a47d415eaeb8f3a00b7b3a6baecb6f6)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559     unsymmetric_check = PETSC_TRUE;
560   } else { /* take references to already computed coarse basis */
561     unsymmetric_check = PETSC_FALSE;
562     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
563     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
564     if (pcbddc->coarse_phi_D) {
565       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
566       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
567     }
568   }
569   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
570   /* Checking coarse_sub_mat and coarse basis functios */
571   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
572   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
573   if (pcbddc->dbg_flag) {
574     Mat         coarse_sub_mat;
575     Mat         AUXMAT,TM1,TM2,TM3,TM4;
576     Mat         coarse_phi_D,coarse_phi_B;
577     Mat         coarse_psi_D,coarse_psi_B;
578     Mat         A_II,A_BB,A_IB,A_BI;
579     MatType     checkmattype=MATSEQAIJ;
580     PetscReal   real_value;
581 
582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
588     if (unsymmetric_check) {
589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
591     }
592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
593 
594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597     if (unsymmetric_check) {
598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     } else {
611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
619     }
620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
627     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
632     }
633     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
634     for (i=0;i<pcbddc->local_primal_size;i++) {
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
636     }
637     if (unsymmetric_check) {
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
641       }
642       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
643       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
645       }
646     }
647     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
649     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
652     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
653     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
656     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
663     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
664     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
665     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
666   }
667   /* free memory */
668   if (n_vertices) {
669     ierr = PetscFree(vertices);CHKERRQ(ierr);
670     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
672     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
675   }
676   if (n_constraints) {
677     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
678     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
679     ierr = MatDestroy(&M1);CHKERRQ(ierr);
680     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
681   }
682   ierr = PetscFree(auxindices);CHKERRQ(ierr);
683   /* get back data */
684   *coarse_submat_vals_n = coarse_submat_vals;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
691 {
692   PC_IS*            pcis = (PC_IS*)(pc->data);
693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
695   PetscBool         issbaij,isbaij;
696   /* manage repeated solves */
697   MatReuse          reuse;
698   MatStructure      matstruct;
699   PetscErrorCode    ierr;
700 
701   PetscFunctionBegin;
702   /* get mat flags */
703   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
704   reuse = MAT_INITIAL_MATRIX;
705   if (pc->setupcalled) {
706     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
707     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
708     if (matstruct == SAME_NONZERO_PATTERN) {
709       reuse = MAT_REUSE_MATRIX;
710     } else {
711       reuse = MAT_INITIAL_MATRIX;
712     }
713   }
714   if (reuse == MAT_INITIAL_MATRIX) {
715     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
716     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
717     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
720   }
721 
722   /* transform local matrices if needed */
723   if (pcbddc->use_change_of_basis) {
724     Mat         change_mat_all;
725     PetscScalar *row_cmat_values;
726     PetscInt    *row_cmat_indices;
727     PetscInt    *nnz,*is_indices,*temp_indices;
728     PetscInt    i,j,k,n_D,n_B;
729 
730     /* Get Non-overlapping dimensions */
731     n_B = pcis->n_B;
732     n_D = pcis->n-n_B;
733 
734     /* compute nonzero structure of change of basis on all local nodes */
735     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     k=1;
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
743       nnz[is_indices[i]]=j;
744       if (k < j) k = j;
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     /* assemble change of basis matrix on the whole set of local dofs */
749     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
750     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
751     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
752     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
753     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
754     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     for (i=0;i<n_D;i++) {
756       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
757     }
758     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
759     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
760     for (i=0;i<n_B;i++) {
761       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
762       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
763       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
764       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
765     }
766     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
769     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
770     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
771     if (!issbaij && !isbaij) {
772       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773     } else {
774       Mat work_mat;
775       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
776       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
777       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
778     }
779     /*
780     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
781     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
782     */
783     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
784     ierr = PetscFree(nnz);CHKERRQ(ierr);
785     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
786   } else {
787     /* without change of basis, the local matrix is unchanged */
788     if (!pcbddc->local_mat) {
789       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
790       pcbddc->local_mat = matis->A;
791     }
792   }
793 
794   /* get submatrices */
795   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
796   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
797   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
798   if (!issbaij) {
799     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
800     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
801   } else {
802     Mat newmat;
803     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
804     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
805     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
806     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
814 {
815   PC_IS*         pcis = (PC_IS*)(pc->data);
816   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
817   IS             is_aux1,is_aux2;
818   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
819   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
820   PetscInt       vbs,bs;
821   PetscBT        bitmask;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   /* Set Non-overlapping dimensions */
826   n_B = pcis->n_B; n_D = pcis->n - n_B;
827   /* get vertex indices from constraint matrix */
828   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
829 
830   /* create auxiliary bitmask */
831   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
832   for (i=0;i<n_vertices;i++) {
833     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
834   }
835 
836   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
837   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
838   for (i=0, n_R=0; i<pcis->n; i++) {
839     if (!PetscBTLookup(bitmask,i)) {
840       idx_R_local[n_R] = i;
841       n_R++;
842     }
843   }
844 
845   /* Block code */
846   vbs = 1;
847   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
848   if (bs>1 && !(n_vertices%bs)) {
849     PetscBool is_blocked = PETSC_TRUE;
850     PetscInt  *vary;
851     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
852     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
853     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
854     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
855     for (i=0; i<n_vertices; i++) {
856       if (vary[i]!=0 && vary[i]!=bs) {
857         is_blocked = PETSC_FALSE;
858         break;
859       }
860     }
861     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
862       vbs = bs;
863       for (i=0;i<n_R/vbs;i++) {
864         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
865       }
866     }
867     ierr = PetscFree(vary);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(vertices);CHKERRQ(ierr);
870   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
871   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
872 
873   /* print some info if requested */
874   if (pcbddc->dbg_flag) {
875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
877     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
878     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
879     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
880     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
885   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
886   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
887   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0; i<n_D; i++) {
890     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
891   }
892   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_R; i++) {
894     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
895       aux_array1[j++] = i;
896     }
897   }
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
899   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
900   for (i=0, j=0; i<n_B; i++) {
901     if (!PetscBTLookup(bitmask,is_indices[i])) {
902       aux_array2[j++] = i;
903     }
904   }
905   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
906   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
907   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
908   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
909   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
910 
911   if (pcbddc->switch_static || pcbddc->dbg_flag) {
912     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
913     for (i=0, j=0; i<n_R; i++) {
914       if (PetscBTLookup(bitmask,idx_R_local[i])) {
915         aux_array1[j++] = i;
916       }
917     }
918     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
919     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
920     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
921   }
922   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
931 {
932   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
933   PC_IS          *pcis = (PC_IS*)pc->data;
934   PC             pc_temp;
935   Mat            A_RR;
936   Vec            vec1,vec2,vec3;
937   MatStructure   matstruct;
938   PetscScalar    m_one = -1.0;
939   PetscReal      value;
940   PetscInt       n_D,n_R,ibs,mbs;
941   PetscBool      use_exact,use_exact_reduced,issbaij;
942   PetscErrorCode ierr;
943   /* prefixes stuff */
944   char           dir_prefix[256],neu_prefix[256],str_level[3];
945   size_t         len;
946 
947   PetscFunctionBegin;
948   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
949 
950   /* compute prefixes */
951   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
952   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
953   if (!pcbddc->current_level) {
954     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
955     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
956     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
957     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
958   } else {
959     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
960     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
961     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
962     len -= 15; /* remove "pc_bddc_coarse_" */
963     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
964     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
965     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
966     *(dir_prefix+len)='\0';
967     *(neu_prefix+len)='\0';
968     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
969     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
970     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
971     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
972   }
973 
974   /* DIRICHLET PROBLEM */
975   /* Matrix for Dirichlet problem is pcis->A_II */
976   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
977   if (!pcbddc->ksp_D) { /* create object if not yet build */
978     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
979     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
980     /* default */
981     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
982     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
983     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
984     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
985     if (issbaij) {
986       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
987     } else {
988       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
989     }
990     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
991   }
992   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
993   /* Allow user's customization */
994   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
996   if (!n_D) {
997     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
998     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
999   }
1000   /* Set Up KSP for Dirichlet problem of BDDC */
1001   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1002   /* set ksp_D into pcis data */
1003   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1004   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1005   pcis->ksp_D = pcbddc->ksp_D;
1006 
1007   /* NEUMANN PROBLEM */
1008   /* Matrix for Neumann problem is A_RR -> we need to create it */
1009   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1011   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1012   if (ibs != mbs) {
1013     Mat newmat;
1014     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1015     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1016     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1017   } else {
1018     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1019   }
1020   if (!pcbddc->ksp_R) { /* create object if not yet build */
1021     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1022     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1023     /* default */
1024     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1025     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1026     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1027     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1028     if (issbaij) {
1029       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1030     } else {
1031       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1032     }
1033     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1034   }
1035   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1036   /* Allow user's customization */
1037   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1038   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1039   if (!n_R) {
1040     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1041     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1042   }
1043   /* Set Up KSP for Neumann problem of BDDC */
1044   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1045 
1046   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1047 
1048   /* Dirichlet */
1049   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1050   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1051   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1052   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1053   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1054   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1055   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1056   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1057   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1058   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1059   /* need to be adapted? */
1060   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1061   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1062   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1063   /* print info */
1064   if (pcbddc->dbg_flag) {
1065     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1068     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1070   }
1071   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1072     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1073   }
1074 
1075   /* Neumann */
1076   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1077   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1078   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1079   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1080   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1081   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1082   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1083   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1084   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1085   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1086   /* need to be adapted? */
1087   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1088   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089   /* print info */
1090   if (pcbddc->dbg_flag) {
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1095     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1096   }
1097   /* free Neumann problem's matrix */
1098   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1104 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1105 {
1106   PetscErrorCode ierr;
1107   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1108 
1109   PetscFunctionBegin;
1110   if (applytranspose) {
1111     if (pcbddc->local_auxmat1) {
1112       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1113       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1114     }
1115     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1116   } else {
1117     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1118     if (pcbddc->local_auxmat1) {
1119       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1120       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1121     }
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1127 #undef __FUNCT__
1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1129 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1130 {
1131   PetscErrorCode ierr;
1132   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1133   PC_IS*            pcis = (PC_IS*)  (pc->data);
1134   const PetscScalar zero = 0.0;
1135 
1136   PetscFunctionBegin;
1137   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1138   if (applytranspose) {
1139     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1140     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1141   } else {
1142     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1143     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1144   }
1145   /* Scatter data of coarse_rhs */
1146   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1147   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148 
1149   /* Local solution on R nodes */
1150   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1151   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   if (pcbddc->switch_static) {
1154     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   }
1157   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1158   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1159   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161   if (pcbddc->switch_static) {
1162     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1163     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   }
1165 
1166   /* Coarse solution */
1167   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1169     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1170   }
1171   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1173 
1174   /* Sum contributions from two levels */
1175   if (applytranspose) {
1176     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1177     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1178   } else {
1179     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1180     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1187 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1188 {
1189   PetscErrorCode ierr;
1190   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1191 
1192   PetscFunctionBegin;
1193   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1199 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1200 {
1201   PetscErrorCode ierr;
1202   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1203 
1204   PetscFunctionBegin;
1205   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /* uncomment for testing purposes */
1210 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1211 #undef __FUNCT__
1212 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1213 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1214 {
1215   PetscErrorCode    ierr;
1216   PC_IS*            pcis = (PC_IS*)(pc->data);
1217   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1218   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1219   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1220   MatType           impMatType=MATSEQAIJ;
1221   /* one and zero */
1222   PetscScalar       one=1.0,zero=0.0;
1223   /* space to store constraints and their local indices */
1224   PetscScalar       *temp_quadrature_constraint;
1225   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1226   /* iterators */
1227   PetscInt          i,j,k,total_counts,temp_start_ptr;
1228   /* stuff to store connected components stored in pcbddc->mat_graph */
1229   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1230   PetscInt          n_ISForFaces,n_ISForEdges;
1231   /* near null space stuff */
1232   MatNullSpace      nearnullsp;
1233   const Vec         *nearnullvecs;
1234   Vec               *localnearnullsp;
1235   PetscBool         nnsp_has_cnst;
1236   PetscInt          nnsp_size;
1237   PetscScalar       *array;
1238   /* BLAS integers */
1239   PetscBLASInt      lwork,lierr;
1240   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1241   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1242   /* LAPACK working arrays for SVD or POD */
1243   PetscBool         skip_lapack;
1244   PetscScalar       *work;
1245   PetscReal         *singular_vals;
1246 #if defined(PETSC_USE_COMPLEX)
1247   PetscReal         *rwork;
1248 #endif
1249 #if defined(PETSC_MISSING_LAPACK_GESVD)
1250   PetscBLASInt      Blas_one_2=1;
1251   PetscScalar       *temp_basis,*correlation_mat;
1252 #else
1253   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1254   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1255 #endif
1256   /* change of basis */
1257   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1258   PetscBool         boolforchange,qr_needed;
1259   PetscBT           touched,change_basis,qr_needed_idx;
1260   /* auxiliary stuff */
1261   PetscInt          *nnz,*is_indices,*local_to_B;
1262   /* some quantities */
1263   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1264   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1265 
1266 
1267   PetscFunctionBegin;
1268   /* Get index sets for faces, edges and vertices from graph */
1269   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1270     pcbddc->use_vertices = PETSC_TRUE;
1271   }
1272   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1273   /* HACK: provide functions to set change of basis */
1274   if (!ISForVertices && pcbddc->NullSpace) {
1275     pcbddc->use_change_of_basis = PETSC_TRUE;
1276     pcbddc->use_change_on_faces = PETSC_FALSE;
1277   }
1278   /* print some info */
1279   if (pcbddc->dbg_flag) {
1280     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1281     i = 0;
1282     if (ISForVertices) {
1283       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1288     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1289   }
1290   /* check if near null space is attached to global mat */
1291   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1292   if (nearnullsp) {
1293     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1294   } else { /* if near null space is not provided BDDC uses constants by default */
1295     nnsp_size = 0;
1296     nnsp_has_cnst = PETSC_TRUE;
1297   }
1298   /* get max number of constraints on a single cc */
1299   max_constraints = nnsp_size;
1300   if (nnsp_has_cnst) max_constraints++;
1301 
1302   /*
1303        Evaluate maximum storage size needed by the procedure
1304        - temp_indices will contain start index of each constraint stored as follows
1305        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1306        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1307        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1308                                                                                                                                                          */
1309   total_counts = n_ISForFaces+n_ISForEdges;
1310   total_counts *= max_constraints;
1311   n_vertices = 0;
1312   if (ISForVertices) {
1313     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1314   }
1315   total_counts += n_vertices;
1316   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1317   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1318   total_counts = 0;
1319   max_size_of_constraint = 0;
1320   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1321     if (i<n_ISForEdges) {
1322       used_IS = &ISForEdges[i];
1323     } else {
1324       used_IS = &ISForFaces[i-n_ISForEdges];
1325     }
1326     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1327     total_counts += j;
1328     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1329   }
1330   total_counts *= max_constraints;
1331   total_counts += n_vertices;
1332   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1333   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1334   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1335   /* local to boundary numbering */
1336   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1337   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1338   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1339   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1340   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   /* get local part of global near null space vectors */
1342   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1343   for (k=0;k<nnsp_size;k++) {
1344     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1345     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1347   }
1348 
1349   /* whether or not to skip lapack calls */
1350   skip_lapack = PETSC_TRUE;
1351   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1352 
1353   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1354   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1355     PetscScalar temp_work;
1356 #if defined(PETSC_MISSING_LAPACK_GESVD)
1357     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1358     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1359     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1360     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1361 #if defined(PETSC_USE_COMPLEX)
1362     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1363 #endif
1364     /* now we evaluate the optimal workspace using query with lwork=-1 */
1365     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1366     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1367     lwork = -1;
1368     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1369 #if !defined(PETSC_USE_COMPLEX)
1370     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1371 #else
1372     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1373 #endif
1374     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1375     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1376 #else /* on missing GESVD */
1377     /* SVD */
1378     PetscInt max_n,min_n;
1379     max_n = max_size_of_constraint;
1380     min_n = max_constraints;
1381     if (max_size_of_constraint < max_constraints) {
1382       min_n = max_size_of_constraint;
1383       max_n = max_constraints;
1384     }
1385     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1386 #if defined(PETSC_USE_COMPLEX)
1387     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1388 #endif
1389     /* now we evaluate the optimal workspace using query with lwork=-1 */
1390     lwork = -1;
1391     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1392     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1393     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1394     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1395 #if !defined(PETSC_USE_COMPLEX)
1396     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1397 #else
1398     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1399 #endif
1400     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1401     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1402 #endif /* on missing GESVD */
1403     /* Allocate optimal workspace */
1404     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1405     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1406   }
1407   /* Now we can loop on constraining sets */
1408   total_counts = 0;
1409   temp_indices[0] = 0;
1410   /* vertices */
1411   if (ISForVertices) {
1412     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1413     if (nnsp_has_cnst) { /* consider all vertices */
1414       for (i=0;i<n_vertices;i++) {
1415         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1416         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1417         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1418         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1419         total_counts++;
1420       }
1421     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1422       PetscBool used_vertex;
1423       for (i=0;i<n_vertices;i++) {
1424         used_vertex = PETSC_FALSE;
1425         k = 0;
1426         while (!used_vertex && k<nnsp_size) {
1427           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1428           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1429             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1430             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1431             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1432             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1433             total_counts++;
1434             used_vertex = PETSC_TRUE;
1435           }
1436           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1437           k++;
1438         }
1439       }
1440     }
1441     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1442     n_vertices = total_counts;
1443   }
1444 
1445   /* edges and faces */
1446   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1447     if (i<n_ISForEdges) {
1448       used_IS = &ISForEdges[i];
1449       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1450     } else {
1451       used_IS = &ISForFaces[i-n_ISForEdges];
1452       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1453     }
1454     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1455     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1456     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1457     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1458     /* change of basis should not be performed on local periodic nodes */
1459     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1460     if (nnsp_has_cnst) {
1461       PetscScalar quad_value;
1462       temp_constraints++;
1463       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1464       for (j=0;j<size_of_constraint;j++) {
1465         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1466         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1467         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1468       }
1469       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1470       total_counts++;
1471     }
1472     for (k=0;k<nnsp_size;k++) {
1473       PetscReal real_value;
1474       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1475       for (j=0;j<size_of_constraint;j++) {
1476         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1477         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1478         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1479       }
1480       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1481       /* check if array is null on the connected component */
1482       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1483       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1484       if (real_value > 0.0) { /* keep indices and values */
1485         temp_constraints++;
1486         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1487         total_counts++;
1488       }
1489     }
1490     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1491     valid_constraints = temp_constraints;
1492     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1493     if (!pcbddc->use_nnsp_true && temp_constraints) {
1494       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1495 
1496 #if defined(PETSC_MISSING_LAPACK_GESVD)
1497       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1498          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1499          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1500             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1501             from that computed using LAPACKgesvd
1502          -> This is due to a different computation of eigenvectors in LAPACKheev
1503          -> The quality of the POD-computed basis will be the same */
1504       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1505       /* Store upper triangular part of correlation matrix */
1506       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1507       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1508       for (j=0;j<temp_constraints;j++) {
1509         for (k=0;k<j+1;k++) {
1510           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1511         }
1512       }
1513       /* compute eigenvalues and eigenvectors of correlation matrix */
1514       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1515       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1516 #if !defined(PETSC_USE_COMPLEX)
1517       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1518 #else
1519       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1520 #endif
1521       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1522       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1523       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1524       j=0;
1525       while (j < temp_constraints && singular_vals[j] < tol) j++;
1526       total_counts=total_counts-j;
1527       valid_constraints = temp_constraints-j;
1528       /* scale and copy POD basis into used quadrature memory */
1529       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1530       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1532       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1533       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1534       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1535       if (j<temp_constraints) {
1536         PetscInt ii;
1537         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1538         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1539         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1540         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1541         for (k=0;k<temp_constraints-j;k++) {
1542           for (ii=0;ii<size_of_constraint;ii++) {
1543             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1544           }
1545         }
1546       }
1547 #else  /* on missing GESVD */
1548       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1549       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1550       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1551       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1552 #if !defined(PETSC_USE_COMPLEX)
1553       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1554 #else
1555       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1556 #endif
1557       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1558       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1559       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1560       k = temp_constraints;
1561       if (k > size_of_constraint) k = size_of_constraint;
1562       j = 0;
1563       while (j < k && singular_vals[k-j-1] < tol) j++;
1564       total_counts = total_counts-temp_constraints+k-j;
1565       valid_constraints = k-j;
1566 #endif /* on missing GESVD */
1567     }
1568     /* setting change_of_basis flag is safe now */
1569     if (boolforchange) {
1570       for (j=0;j<valid_constraints;j++) {
1571         PetscBTSet(change_basis,total_counts-j-1);
1572       }
1573     }
1574   }
1575   /* free index sets of faces, edges and vertices */
1576   for (i=0;i<n_ISForFaces;i++) {
1577     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1578   }
1579   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1580   for (i=0;i<n_ISForEdges;i++) {
1581     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1582   }
1583   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1584   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1585 
1586   /* free workspace */
1587   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1588     ierr = PetscFree(work);CHKERRQ(ierr);
1589 #if defined(PETSC_USE_COMPLEX)
1590     ierr = PetscFree(rwork);CHKERRQ(ierr);
1591 #endif
1592     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1593 #if defined(PETSC_MISSING_LAPACK_GESVD)
1594     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1595     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1596 #endif
1597   }
1598   for (k=0;k<nnsp_size;k++) {
1599     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1600   }
1601   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1602 
1603   /* set quantities in pcbddc data structure */
1604   /* n_vertices defines the number of subdomain corners in the primal space */
1605   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1606   pcbddc->local_primal_size = total_counts;
1607   pcbddc->n_vertices = n_vertices;
1608   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1609 
1610   /* Create constraint matrix */
1611   /* The constraint matrix is used to compute the l2g map of primal dofs */
1612   /* so we need to set it up properly either with or without change of basis */
1613   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1614   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1615   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1616   /* array to compute a local numbering of constraints : vertices first then constraints */
1617   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1618   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1619   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1620   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1621   /* auxiliary stuff for basis change */
1622   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1623   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1624 
1625   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1626   total_primal_vertices=0;
1627   for (i=0;i<pcbddc->local_primal_size;i++) {
1628     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1629     if (size_of_constraint == 1) {
1630       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1631       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1632       aux_primal_minloc[total_primal_vertices]=0;
1633       total_primal_vertices++;
1634     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1635       PetscInt min_loc,min_index;
1636       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1637       /* find first untouched local node */
1638       k = 0;
1639       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1640       min_index = global_indices[k];
1641       min_loc = k;
1642       /* search the minimum among global nodes already untouched on the cc */
1643       for (k=1;k<size_of_constraint;k++) {
1644         /* there can be more than one constraint on a single connected component */
1645         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1646           min_index = global_indices[k];
1647           min_loc = k;
1648         }
1649       }
1650       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1651       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1652       aux_primal_minloc[total_primal_vertices]=min_loc;
1653       total_primal_vertices++;
1654     }
1655   }
1656   /* determine if a QR strategy is needed for change of basis */
1657   qr_needed = PETSC_FALSE;
1658   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1659   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1660     if (PetscBTLookup(change_basis,i)) {
1661       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1662       j = 0;
1663       for (k=0;k<size_of_constraint;k++) {
1664         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1665           j++;
1666         }
1667       }
1668       /* found more than one primal dof on the cc */
1669       if (j > 1) {
1670         PetscBTSet(qr_needed_idx,i);
1671         qr_needed = PETSC_TRUE;
1672       }
1673     }
1674   }
1675   /* free workspace */
1676   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1677 
1678   /* permute indices in order to have a sorted set of vertices */
1679   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1680 
1681   /* nonzero structure of constraint matrix */
1682   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1683   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1684   j=total_primal_vertices;
1685   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1686     if (!PetscBTLookup(change_basis,i)) {
1687       nnz[j]=temp_indices[i+1]-temp_indices[i];
1688       j++;
1689     }
1690   }
1691   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1692   ierr = PetscFree(nnz);CHKERRQ(ierr);
1693   /* set values in constraint matrix */
1694   for (i=0;i<total_primal_vertices;i++) {
1695     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1696   }
1697   total_counts = total_primal_vertices;
1698   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1699     if (!PetscBTLookup(change_basis,i)) {
1700       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1701       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1702       total_counts++;
1703     }
1704   }
1705   /* assembling */
1706   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1708   /*
1709   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1710   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1711   */
1712   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1713   if (pcbddc->use_change_of_basis) {
1714     /* dual and primal dofs on a single cc */
1715     PetscInt     dual_dofs,primal_dofs;
1716     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1717     PetscInt     primal_counter;
1718     /* working stuff for GEQRF */
1719     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1720     PetscBLASInt lqr_work;
1721     /* working stuff for UNGQR */
1722     PetscScalar  *gqr_work,lgqr_work_t;
1723     PetscBLASInt lgqr_work;
1724     /* working stuff for TRTRS */
1725     PetscScalar  *trs_rhs;
1726     PetscBLASInt Blas_NRHS;
1727     /* pointers for values insertion into change of basis matrix */
1728     PetscInt     *start_rows,*start_cols;
1729     PetscScalar  *start_vals;
1730     /* working stuff for values insertion */
1731     PetscBT      is_primal;
1732 
1733     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1734     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1735     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1736     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1737     /* work arrays */
1738     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1739     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1740     /* nonzeros per row */
1741     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1742       if (PetscBTLookup(change_basis,i)) {
1743         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1744         if (PetscBTLookup(qr_needed_idx,i)) {
1745           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1746         } else {
1747           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1748           /* get local primal index on the cc */
1749           j = 0;
1750           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1751           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1752         }
1753       }
1754     }
1755     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1756     ierr = PetscFree(nnz);CHKERRQ(ierr);
1757     /* Set initial identity in the matrix */
1758     for (i=0;i<pcis->n_B;i++) {
1759       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1760     }
1761 
1762     if (pcbddc->dbg_flag) {
1763       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1764       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1765     }
1766 
1767 
1768     /* Now we loop on the constraints which need a change of basis */
1769     /*
1770        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1771        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1772 
1773        Basic blocks of change of basis matrix T computed
1774 
1775           - Using the following block transformation if there is only a primal dof on the cc
1776             (in the example, primal dof is the last one of the edge in LOCAL ordering
1777              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1778             | 1        0   ...        0     1 |
1779             | 0        1   ...        0     1 |
1780             |              ...                |
1781             | 0        ...            1     1 |
1782             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1783 
1784           - via QR decomposition of constraints otherwise
1785     */
1786     if (qr_needed) {
1787       /* space to store Q */
1788       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1789       /* first we issue queries for optimal work */
1790       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1791       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1792       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1793       lqr_work = -1;
1794       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1795       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1796       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1797       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1798       lgqr_work = -1;
1799       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1800       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1801       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1802       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1803       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1804       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1805       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1806       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1807       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1808       /* array to store scaling factors for reflectors */
1809       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1810       /* array to store rhs and solution of triangular solver */
1811       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1812       /* allocating workspace for check */
1813       if (pcbddc->dbg_flag) {
1814         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1815       }
1816     }
1817     /* array to store whether a node is primal or not */
1818     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1819     for (i=0;i<total_primal_vertices;i++) {
1820       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1821     }
1822 
1823     /* loop on constraints and see whether or not they need a change of basis and compute it */
1824     /* -> using implicit ordering contained in temp_indices data */
1825     total_counts = pcbddc->n_vertices;
1826     primal_counter = total_counts;
1827     while (total_counts<pcbddc->local_primal_size) {
1828       primal_dofs = 1;
1829       if (PetscBTLookup(change_basis,total_counts)) {
1830         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1831         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1832           primal_dofs++;
1833         }
1834         /* get constraint info */
1835         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1836         dual_dofs = size_of_constraint-primal_dofs;
1837 
1838         if (pcbddc->dbg_flag) {
1839           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1840         }
1841 
1842         if (primal_dofs > 1) { /* QR */
1843 
1844           /* copy quadrature constraints for change of basis check */
1845           if (pcbddc->dbg_flag) {
1846             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1847           }
1848           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1849           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1850 
1851           /* compute QR decomposition of constraints */
1852           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1854           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1855           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1856           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1857           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1858           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1859 
1860           /* explictly compute R^-T */
1861           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1862           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1863           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1864           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1867           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1868           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1869           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1870           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1871 
1872           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1873           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1874           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1875           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1876           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1877           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1878           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1879           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1880           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1881 
1882           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1883              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1884              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1885           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1886           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1887           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1888           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1889           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1890           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1891           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1892           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1893           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1894           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1895 
1896           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1897           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1898           /* insert cols for primal dofs */
1899           for (j=0;j<primal_dofs;j++) {
1900             start_vals = &qr_basis[j*size_of_constraint];
1901             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1902             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1903           }
1904           /* insert cols for dual dofs */
1905           for (j=0,k=0;j<dual_dofs;k++) {
1906             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1907               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1908               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1909               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1910               j++;
1911             }
1912           }
1913 
1914           /* check change of basis */
1915           if (pcbddc->dbg_flag) {
1916             PetscInt   ii,jj;
1917             PetscBool valid_qr=PETSC_TRUE;
1918             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1919             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1920             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1921             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1922             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1923             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1924             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1925             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1926             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1927             for (jj=0;jj<size_of_constraint;jj++) {
1928               for (ii=0;ii<primal_dofs;ii++) {
1929                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1930                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1931               }
1932             }
1933             if (!valid_qr) {
1934               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1935               for (jj=0;jj<size_of_constraint;jj++) {
1936                 for (ii=0;ii<primal_dofs;ii++) {
1937                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1938                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1939                   }
1940                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1941                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1942                   }
1943                 }
1944               }
1945             } else {
1946               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1947             }
1948           }
1949         } else { /* simple transformation block */
1950           PetscInt row,col;
1951           PetscScalar val;
1952           for (j=0;j<size_of_constraint;j++) {
1953             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1954             if (!PetscBTLookup(is_primal,row)) {
1955               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1956               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1957               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1958             } else {
1959               for (k=0;k<size_of_constraint;k++) {
1960                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1961                 if (row != col) {
1962                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1963                 } else {
1964                   val = 1.0;
1965                 }
1966                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1967               }
1968             }
1969           }
1970         }
1971         /* increment primal counter */
1972         primal_counter += primal_dofs;
1973       } else {
1974         if (pcbddc->dbg_flag) {
1975           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1976         }
1977       }
1978       /* increment constraint counter total_counts */
1979       total_counts += primal_dofs;
1980     }
1981 
1982     /* free workspace */
1983     if (qr_needed) {
1984       if (pcbddc->dbg_flag) {
1985         ierr = PetscFree(work);CHKERRQ(ierr);
1986       }
1987       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1988       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1989       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1990       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1991       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1992     }
1993     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1994     /* assembling */
1995     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997     /*
1998     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1999     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2000     */
2001   }
2002 
2003   /* flush dbg viewer */
2004   if (pcbddc->dbg_flag) {
2005     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2006   }
2007 
2008   /* free workspace */
2009   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2010   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2011   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2012   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2013   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2014   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2015   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2016   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2017   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2018   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2024 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2025 {
2026   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2027   PC_IS       *pcis = (PC_IS*)pc->data;
2028   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2029   PetscInt    bs,ierr,i,vertex_size;
2030   PetscViewer viewer=pcbddc->dbg_viewer;
2031 
2032   PetscFunctionBegin;
2033   /* Init local Graph struct */
2034   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2035 
2036   /* Check validity of the csr graph passed in by the user */
2037   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2038     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2039   }
2040 
2041   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2042   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2043     Mat mat_adj;
2044     const PetscInt *xadj,*adjncy;
2045     PetscBool flg_row=PETSC_TRUE;
2046 
2047     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2048     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2049     if (!flg_row) {
2050       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2051     }
2052     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2053     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2054     if (!flg_row) {
2055       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2056     }
2057     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2058   }
2059 
2060   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2061   vertex_size = 1;
2062   if (!pcbddc->user_provided_isfordofs) {
2063     if (!pcbddc->n_ISForDofs) {
2064       IS *custom_ISForDofs;
2065 
2066       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2067       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2068       for (i=0;i<bs;i++) {
2069         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2070       }
2071       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2072       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2073       /* remove my references to IS objects */
2074       for (i=0;i<bs;i++) {
2075         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2076       }
2077       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2078     }
2079   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2080     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2081   }
2082 
2083   /* Setup of Graph */
2084   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2085 
2086   /* Graph's connected components analysis */
2087   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2088 
2089   /* print some info to stdout */
2090   if (pcbddc->dbg_flag) {
2091     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2098 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2099 {
2100   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2101   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2102   PetscErrorCode ierr;
2103 
2104   PetscFunctionBegin;
2105   n = 0;
2106   vertices = 0;
2107   if (pcbddc->ConstraintMatrix) {
2108     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2109     for (i=0;i<local_primal_size;i++) {
2110       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2111       if (size_of_constraint == 1) n++;
2112       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2113     }
2114     if (vertices_idx) {
2115       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2116       n = 0;
2117       for (i=0;i<local_primal_size;i++) {
2118         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2119         if (size_of_constraint == 1) {
2120           vertices[n++]=row_cmat_indices[0];
2121         }
2122         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2123       }
2124     }
2125   }
2126   *n_vertices = n;
2127   if (vertices_idx) *vertices_idx = vertices;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2133 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2134 {
2135   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2136   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2137   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2138   PetscBT        touched;
2139   PetscErrorCode ierr;
2140 
2141     /* This function assumes that the number of local constraints per connected component
2142        is not greater than the number of nodes defined for the connected component
2143        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2144   PetscFunctionBegin;
2145   n = 0;
2146   constraints_index = 0;
2147   if (pcbddc->ConstraintMatrix) {
2148     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2149     max_size_of_constraint = 0;
2150     for (i=0;i<local_primal_size;i++) {
2151       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2152       if (size_of_constraint > 1) {
2153         n++;
2154       }
2155       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2156       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2157     }
2158     if (constraints_idx) {
2159       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2160       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2161       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2162       n = 0;
2163       for (i=0;i<local_primal_size;i++) {
2164         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2165         if (size_of_constraint > 1) {
2166           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2167           /* find first untouched local node */
2168           j = 0;
2169           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2170           min_index = row_cmat_global_indices[j];
2171           min_loc = j;
2172           /* search the minimum among nodes not yet touched on the connected component
2173              since there can be more than one constraint on a single cc */
2174           for (j=1;j<size_of_constraint;j++) {
2175             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2176               min_index = row_cmat_global_indices[j];
2177               min_loc = j;
2178             }
2179           }
2180           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2181           constraints_index[n++] = row_cmat_indices[min_loc];
2182         }
2183         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2184       }
2185       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2186       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2187     }
2188   }
2189   *n_constraints = n;
2190   if (constraints_idx) *constraints_idx = constraints_index;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /* the next two functions has been adapted from pcis.c */
2195 #undef __FUNCT__
2196 #define __FUNCT__ "PCBDDCApplySchur"
2197 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2198 {
2199   PetscErrorCode ierr;
2200   PC_IS          *pcis = (PC_IS*)(pc->data);
2201 
2202   PetscFunctionBegin;
2203   if (!vec2_B) { vec2_B = v; }
2204   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2205   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2206   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2207   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2208   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2214 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2215 {
2216   PetscErrorCode ierr;
2217   PC_IS          *pcis = (PC_IS*)(pc->data);
2218 
2219   PetscFunctionBegin;
2220   if (!vec2_B) { vec2_B = v; }
2221   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2222   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2223   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2224   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2225   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "PCBDDCSubsetNumbering"
2231 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2232 {
2233   Vec            local_vec,global_vec;
2234   IS             seqis,paris;
2235   VecScatter     scatter_ctx;
2236   PetscScalar    *array;
2237   PetscInt       *temp_global_dofs;
2238   PetscScalar    globalsum;
2239   PetscInt       i,j,s;
2240   PetscInt       nlocals,first_index,old_index,max_local;
2241   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2242   PetscMPIInt    *dof_sizes,*dof_displs;
2243   PetscBool      first_found;
2244   PetscErrorCode ierr;
2245 
2246   PetscFunctionBegin;
2247   /* mpi buffers */
2248   MPI_Comm_size(comm,&size_prec_comm);
2249   MPI_Comm_rank(comm,&rank_prec_comm);
2250   j = ( !rank_prec_comm ? size_prec_comm : 0);
2251   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2252   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2253   /* get maximum size of subset */
2254   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2255   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2256   max_local = 0;
2257   if (n_local_dofs) {
2258     max_local = temp_global_dofs[0];
2259     for (i=1;i<n_local_dofs;i++) {
2260       if (max_local < temp_global_dofs[i] ) {
2261         max_local = temp_global_dofs[i];
2262       }
2263     }
2264   }
2265   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2266   max_global++;
2267   max_local = 0;
2268   if (n_local_dofs) {
2269     max_local = local_dofs[0];
2270     for (i=1;i<n_local_dofs;i++) {
2271       if (max_local < local_dofs[i] ) {
2272         max_local = local_dofs[i];
2273       }
2274     }
2275   }
2276   max_local++;
2277   /* allocate workspace */
2278   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2279   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2280   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2281   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2282   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2283   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2284   /* create scatter */
2285   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2286   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2287   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2288   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2289   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2290   /* init array */
2291   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2292   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2293   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2294   if (local_dofs_mult) {
2295     for (i=0;i<n_local_dofs;i++) {
2296       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2297     }
2298   } else {
2299     for (i=0;i<n_local_dofs;i++) {
2300       array[local_dofs[i]]=1.0;
2301     }
2302   }
2303   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2304   /* scatter into global vec and get total number of global dofs */
2305   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2306   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2307   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2308   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2309   /* Fill global_vec with cumulative function for global numbering */
2310   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2311   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2312   nlocals = 0;
2313   first_index = -1;
2314   first_found = PETSC_FALSE;
2315   for (i=0;i<s;i++) {
2316     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2317       first_found = PETSC_TRUE;
2318       first_index = i;
2319     }
2320     nlocals += (PetscInt)PetscRealPart(array[i]);
2321   }
2322   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2323   if (!rank_prec_comm) {
2324     dof_displs[0]=0;
2325     for (i=1;i<size_prec_comm;i++) {
2326       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2327     }
2328   }
2329   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2330   if (first_found) {
2331     array[first_index] += (PetscScalar)nlocals;
2332     old_index = first_index;
2333     for (i=first_index+1;i<s;i++) {
2334       if (PetscRealPart(array[i]) > 0.0) {
2335         array[i] += array[old_index];
2336         old_index = i;
2337       }
2338     }
2339   }
2340   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2341   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2342   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2343   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2344   /* get global ordering of local dofs */
2345   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2346   if (local_dofs_mult) {
2347     for (i=0;i<n_local_dofs;i++) {
2348       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2349     }
2350   } else {
2351     for (i=0;i<n_local_dofs;i++) {
2352       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2353     }
2354   }
2355   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2356   /* free workspace */
2357   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2358   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2359   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2360   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2361   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2362   /* return pointer to global ordering of local dofs */
2363   *global_numbering_subset = temp_global_dofs;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2369 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2370 {
2371   PetscInt       i,j;
2372   PetscScalar    *alphas;
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   /* this implements stabilized Gram-Schmidt */
2377   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2378   for (i=0;i<n;i++) {
2379     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2380     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2381     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2382   }
2383   ierr = PetscFree(alphas);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatConvert_IS_AIJ"
2390 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2391 {
2392   Mat new_mat;
2393   Mat_IS *matis = (Mat_IS*)(mat->data);
2394   /* info on mat */
2395   PetscInt bs,rows,cols;
2396   PetscInt lrows,lcols;
2397   PetscInt local_rows,local_cols;
2398   PetscMPIInt nsubdomains;
2399   /* preallocation */
2400   Vec vec_dnz,vec_onz;
2401   PetscScalar *my_dnz,*my_onz,*array;
2402   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2403   PetscInt  index_row,index_col,owner;
2404   PetscInt  *local_indices,*global_indices;
2405   /* work */
2406   PetscInt i,j;
2407   PetscErrorCode ierr;
2408 
2409   PetscFunctionBegin;
2410   /* CHECKS */
2411   /* get info from mat */
2412   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2413   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2414   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2415   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2416 
2417   /* MAT_INITIAL_MATRIX */
2418   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2419   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2420   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2421   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2422   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2423   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2424 
2425   /*
2426     preallocation
2427   */
2428 
2429   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2430   /*
2431      Some vectors are needed to sum up properly on shared interface dofs.
2432      Note that preallocation is not exact, since it overestimates nonzeros
2433   */
2434 /*
2435   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2436   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2437   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2438   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2439 */
2440   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2441   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2442   /* All processes need to compute entire row ownership */
2443   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2444   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2445   for (i=0;i<nsubdomains;i++) {
2446     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2447       row_ownership[j]=i;
2448     }
2449   }
2450   /* map indices of local mat to global values */
2451   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2452   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2453   for (i=0;i<local_rows;i++) local_indices[i]=i;
2454   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2455   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2456 
2457   /*
2458      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2459      then, they will be summed up properly. This way, preallocation is always sufficient
2460   */
2461   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2462   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2463   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2464   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2465   for (i=0;i<local_rows;i++) {
2466     index_row = global_indices[i];
2467     for (j=i;j<local_rows;j++) {
2468       owner = row_ownership[index_row];
2469       index_col = global_indices[j];
2470       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2471         my_dnz[i] += 1.0;
2472       } else { /* offdiag block */
2473         my_onz[i] += 1.0;
2474       }
2475       /* same as before, interchanging rows and cols */
2476       if (i != j) {
2477         owner = row_ownership[index_col];
2478         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2479           my_dnz[j] += 1.0;
2480         } else {
2481           my_onz[j] += 1.0;
2482         }
2483       }
2484     }
2485   }
2486   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2487   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2488   if (local_rows) { /* multilevel guard */
2489     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2490     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2491   }
2492   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2493   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2494   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2495   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2496   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2497   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2498   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2499 
2500   /* set computed preallocation in dnz and onz */
2501   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2502   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2503   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2504   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2505   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2506   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2507   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2508   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2509 
2510   /* Resize preallocation if overestimated */
2511   for (i=0;i<lrows;i++) {
2512     dnz[i] = PetscMin(dnz[i],lcols);
2513     onz[i] = PetscMin(onz[i],cols-lcols);
2514   }
2515   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2516   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2517 
2518   /*
2519     Set values. Very Basic.
2520   */
2521   for (i=0;i<local_rows;i++) {
2522     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2523     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2524     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2525     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2526     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2527   }
2528   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2529   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2530   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531 
2532   /* get back mat */
2533   *M = new_mat;
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatISSubassemble_Private"
2539 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2540 {
2541   Mat             subdomain_adj;
2542   IS              new_ranks,ranks_send_to;
2543   MatPartitioning partitioner;
2544   Mat_IS          *matis;
2545   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2546   PetscInt        prank;
2547   PetscMPIInt     size,rank,color;
2548   PetscInt        *xadj,*adjncy,*oldranks;
2549   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2550   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2551   PetscErrorCode  ierr;
2552   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2553   PetscSubcomm    subcomm;
2554 
2555   PetscFunctionBegin;
2556   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2557   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2558   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2559 
2560   /* Get info on mapping */
2561   matis = (Mat_IS*)(mat->data);
2562   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2563   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2564 
2565   /* build local CSR graph of subdomains' connectivity */
2566   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2567   xadj[0] = 0;
2568   xadj[1] = PetscMax(n_neighs-1,0);
2569   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2570   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2571 
2572   if (threshold) {
2573     PetscInt* count,min_threshold;
2574     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2575     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2576     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2577       for (j=0;j<n_shared[i];j++) {
2578         count[shared[i][j]] += 1;
2579       }
2580     }
2581     /* adapt threshold since we dont want to lose significant connections */
2582     min_threshold = n_neighs;
2583     for (i=1;i<n_neighs;i++) {
2584       for (j=0;j<n_shared[i];j++) {
2585         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2586       }
2587     }
2588     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2589     threshold = PetscMax(min_threshold+1,threshold);
2590     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2591     xadj[1] = 0;
2592     for (i=1;i<n_neighs;i++) {
2593       for (j=0;j<n_shared[i];j++) {
2594         if (count[shared[i][j]] < threshold) {
2595           adjncy[xadj[1]] = neighs[i];
2596           adjncy_wgt[xadj[1]] = n_shared[i];
2597           xadj[1]++;
2598           break;
2599         }
2600       }
2601     }
2602     ierr = PetscFree(count);CHKERRQ(ierr);
2603   } else {
2604     if (xadj[1]) {
2605       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2606       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2607     }
2608   }
2609   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2610   if (use_square) {
2611     for (i=0;i<xadj[1];i++) {
2612       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2613     }
2614   }
2615   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2616 
2617   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2618 
2619   /*
2620     Restrict work on active processes only.
2621   */
2622   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2623   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2624   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2625   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2626   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2627   if (color) {
2628     ierr = PetscFree(xadj);CHKERRQ(ierr);
2629     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2630     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2631   } else {
2632     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2633     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2634     prank = rank;
2635     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2636     for (i=0;i<size;i++) {
2637       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2638     }
2639     for (i=0;i<xadj[1];i++) {
2640       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2641     }
2642     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2643     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2644     n_subdomains = (PetscInt)size;
2645     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2646 
2647     /* Partition */
2648     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2649     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2650     if (use_vwgt) {
2651       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2652       v_wgt[0] = local_size;
2653       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2654     }
2655     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2656     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2657     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2658     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2659     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2660 
2661     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2662     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2663     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2664     /* clean up */
2665     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2666     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2667     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2668     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2669   }
2670   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2671 
2672   /* assemble parallel IS for sends */
2673   i = 1;
2674   if (color) i=0;
2675   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2676   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2677 
2678   /* get back IS */
2679   *is_sends = ranks_send_to;
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2684 
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatISSubassemble"
2687 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2688 {
2689   Mat                    local_mat,new_mat;
2690   Mat_IS                 *matis;
2691   IS                     is_sends_internal;
2692   PetscInt               rows,cols;
2693   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2694   PetscBool              ismatis,isdense;
2695   ISLocalToGlobalMapping l2gmap;
2696   PetscInt*              l2gmap_indices;
2697   const PetscInt*        is_indices;
2698   MatType                new_local_type;
2699   MatTypePrivate         new_local_type_private;
2700   /* buffers */
2701   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2702   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2703   /* MPI */
2704   MPI_Comm               comm;
2705   PetscMPIInt            n_sends,n_recvs,commsize;
2706   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2707   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2708   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2709   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2710   PetscErrorCode         ierr;
2711 
2712   PetscFunctionBegin;
2713   /* checks */
2714   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2715   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2716   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2717   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2718   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2719   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2720   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2721   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2722   PetscValidLogicalCollectiveInt(mat,bs,0);
2723   /* prepare IS for sending if not provided */
2724   if (!is_sends) {
2725     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2726     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2727   } else {
2728     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2729     is_sends_internal = is_sends;
2730   }
2731 
2732   /* get pointer of MATIS data */
2733   matis = (Mat_IS*)mat->data;
2734 
2735   /* get comm */
2736   comm = PetscObjectComm((PetscObject)mat);
2737 
2738   /* compute number of sends */
2739   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2740   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2741 
2742   /* compute number of receives */
2743   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2744   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2745   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2746   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2747   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2748   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2749   ierr = PetscFree(iflags);CHKERRQ(ierr);
2750 
2751   /* prepare send/receive buffers */
2752   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2753   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2754   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2755   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2756 
2757   /* Get data from local mat */
2758   if (!isdense) {
2759     /* TODO: See below some guidelines on how to prepare the local buffers */
2760     /*
2761        send_buffer_vals should contain the raw values of the local matrix
2762        send_buffer_idxs should contain:
2763        - MatType_PRIVATE type
2764        - PetscInt        size_of_l2gmap
2765        - PetscInt        global_row_indices[size_of_l2gmap]
2766        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2767     */
2768   } else {
2769     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2770     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2771     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2772     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2773     send_buffer_idxs[1] = i;
2774     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2775     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2776     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2777     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2778     for (i=0;i<n_sends;i++) {
2779       ilengths_vals[is_indices[i]] = len*len;
2780       ilengths_idxs[is_indices[i]] = len+2;
2781     }
2782   }
2783   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2784   buf_size_idxs = 0;
2785   buf_size_vals = 0;
2786   for (i=0;i<n_recvs;i++) {
2787     buf_size_idxs += (PetscInt)olengths_idxs[i];
2788     buf_size_vals += (PetscInt)olengths_vals[i];
2789   }
2790   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2791   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2792 
2793   /* get new tags for clean communications */
2794   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2795   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2796 
2797   /* allocate for requests */
2798   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2799   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2800   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2801   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2802 
2803   /* communications */
2804   ptr_idxs = recv_buffer_idxs;
2805   ptr_vals = recv_buffer_vals;
2806   for (i=0;i<n_recvs;i++) {
2807     source_dest = onodes[i];
2808     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2809     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2810     ptr_idxs += olengths_idxs[i];
2811     ptr_vals += olengths_vals[i];
2812   }
2813   for (i=0;i<n_sends;i++) {
2814     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2815     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2816     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2817   }
2818   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2819   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2820 
2821   /* assemble new l2g map */
2822   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2823   ptr_idxs = recv_buffer_idxs;
2824   buf_size_idxs = 0;
2825   for (i=0;i<n_recvs;i++) {
2826     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2827     ptr_idxs += olengths_idxs[i];
2828   }
2829   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2830   ptr_idxs = recv_buffer_idxs;
2831   buf_size_idxs = 0;
2832   for (i=0;i<n_recvs;i++) {
2833     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2834     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2835     ptr_idxs += olengths_idxs[i];
2836   }
2837   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2838   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2839   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2840 
2841   /* infer new local matrix type from received local matrices type */
2842   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2843   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2844   new_local_type_private = MATAIJ_PRIVATE;
2845   new_local_type = MATSEQAIJ;
2846   if (n_recvs) {
2847     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2848     ptr_idxs = recv_buffer_idxs;
2849     for (i=0;i<n_recvs;i++) {
2850       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2851         new_local_type_private = MATAIJ_PRIVATE;
2852         break;
2853       }
2854       ptr_idxs += olengths_idxs[i];
2855     }
2856     switch (new_local_type_private) {
2857       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2858         new_local_type = MATSEQAIJ;
2859         bs = 1;
2860         break;
2861       case MATAIJ_PRIVATE:
2862         new_local_type = MATSEQAIJ;
2863         bs = 1;
2864         break;
2865       case MATBAIJ_PRIVATE:
2866         new_local_type = MATSEQBAIJ;
2867         break;
2868       case MATSBAIJ_PRIVATE:
2869         new_local_type = MATSEQSBAIJ;
2870         break;
2871       default:
2872         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2873         break;
2874     }
2875   }
2876 
2877   /* create MATIS object */
2878   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2879   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2880   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2881   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2882   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2883   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2884 
2885   /* set values */
2886   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2887   ptr_vals = recv_buffer_vals;
2888   ptr_idxs = recv_buffer_idxs;
2889   for (i=0;i<n_recvs;i++) {
2890     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2891       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2892       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2893       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2894       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2895       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2896     }
2897     ptr_idxs += olengths_idxs[i];
2898     ptr_vals += olengths_vals[i];
2899   }
2900   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2901   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904 
2905   { /* check */
2906     Vec       lvec,rvec;
2907     PetscReal infty_error;
2908 
2909     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2910     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2911     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2912     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2913     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2914     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2915     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2916     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2917     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2918   }
2919 
2920   /* free workspace */
2921   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2922   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2923   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2924   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2925   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2926   if (isdense) {
2927     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2928     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2929   } else {
2930     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2931   }
2932   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2933   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2934   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2935   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2936   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2937   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2938   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2939   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2940   ierr = PetscFree(onodes);CHKERRQ(ierr);
2941   /* get back new mat */
2942   *mat_n = new_mat;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2948 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2949 {
2950   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2951   PC_IS                  *pcis = (PC_IS*)pc->data;
2952   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2953   MatNullSpace           CoarseNullSpace=NULL;
2954   ISLocalToGlobalMapping coarse_islg;
2955   IS                     coarse_is;
2956   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2957   PetscInt               im_active=-1,active_procs=-1;
2958   PC                     pc_temp;
2959   PCType                 coarse_pc_type;
2960   KSPType                coarse_ksp_type;
2961   PetscBool              multilevel_requested,multilevel_allowed;
2962   PetscBool              setsym,issym,isbddc,isnn;
2963   MatStructure           matstruct;
2964   PetscErrorCode         ierr;
2965 
2966   PetscFunctionBegin;
2967   /* Assign global numbering to coarse dofs */
2968   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2969 
2970   /* infer some info from user */
2971   issym = PETSC_FALSE;
2972   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2973   multilevel_allowed = PETSC_FALSE;
2974   multilevel_requested = PETSC_FALSE;
2975   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2976   if (multilevel_requested) {
2977     /* count "active processes" */
2978     im_active = !!(pcis->n);
2979     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2980     if (active_procs/pcbddc->coarsening_ratio < 2) {
2981       multilevel_allowed = PETSC_FALSE;
2982     } else {
2983       multilevel_allowed = PETSC_TRUE;
2984     }
2985   }
2986 
2987   /* set defaults for coarse KSP and PC */
2988   if (multilevel_allowed) {
2989     if (issym) {
2990       coarse_ksp_type = KSPRICHARDSON;
2991     } else {
2992       coarse_ksp_type = KSPCHEBYSHEV;
2993     }
2994     coarse_pc_type = PCBDDC;
2995   } else {
2996     coarse_ksp_type = KSPPREONLY;
2997     coarse_pc_type = PCREDUNDANT;
2998   }
2999 
3000   /* create the coarse KSP object only once with defaults */
3001   if (!pcbddc->coarse_ksp) {
3002     char prefix[256],str_level[3];
3003     size_t len;
3004     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3005     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3006     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3007     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3008     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3009     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3010     /* prefix */
3011     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3012     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3013     if (!pcbddc->current_level) {
3014       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3015       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3016     } else {
3017       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3018       if (pcbddc->current_level>1) len -= 2;
3019       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3020       *(prefix+len)='\0';
3021       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3022       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3023     }
3024     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3025   }
3026   /* allow user customization */
3027   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3028   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3029   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3030 
3031   /* get some info after set from options */
3032   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3033   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3034   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3035   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3036     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3037     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3038     isbddc = PETSC_FALSE;
3039   }
3040 
3041   /* propagate BDDC info to the next level */
3042   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3043   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3044   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3045 
3046   /* creates temporary MATIS object for coarse matrix */
3047   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3048   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3049   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3050   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3051   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3052   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3055   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3056   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3057 
3058   /* assemble coarse matrix */
3059   if (isbddc || isnn) {
3060     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3061   } else {
3062     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3063   }
3064   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3065 
3066   /* create local to global scatters for coarse problem */
3067   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3068   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3069   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3070 
3071   /* propagate symmetry info to coarse matrix */
3072   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3073 
3074   /* Compute coarse null space (special handling by BDDC only) */
3075   if (pcbddc->NullSpace) {
3076     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3077     if (isbddc) {
3078       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3079     } else {
3080       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3081     }
3082   }
3083 
3084   /* set operators */
3085   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3086   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3087 
3088   /* additional KSP customization */
3089   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3090   if (max_it < 5) {
3091     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3092   }
3093   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3094 
3095 
3096   /* print some info if requested */
3097   if (pcbddc->dbg_flag) {
3098     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3099     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3100     if (!multilevel_allowed) {
3101       if (multilevel_requested) {
3102         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3103       } else if (pcbddc->max_levels) {
3104         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3105       }
3106     }
3107     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3108     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3109   }
3110 
3111   /* setup coarse ksp */
3112   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3113   if (pcbddc->dbg_flag) {
3114     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3115     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3116     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3118   }
3119 
3120   /* Check coarse problem if requested */
3121   if (pcbddc->dbg_flag) {
3122     KSP       check_ksp;
3123     KSPType   check_ksp_type;
3124     PC        check_pc;
3125     Vec       check_vec;
3126     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3127     PetscInt  its;
3128     PetscBool ispreonly,compute;
3129 
3130     /* Create ksp object suitable for estimation of extreme eigenvalues */
3131     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3132     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3133     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3134     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3135     if (ispreonly) {
3136       check_ksp_type = KSPPREONLY;
3137       compute = PETSC_FALSE;
3138     } else {
3139       if (issym) check_ksp_type = KSPCG;
3140       else check_ksp_type = KSPGMRES;
3141       compute = PETSC_TRUE;
3142     }
3143     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3144     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3145     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3146     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3147     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3148     /* create random vec */
3149     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3150     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3151     if (CoarseNullSpace) {
3152       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3153     }
3154     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3155     /* solve coarse problem */
3156     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3157     if (CoarseNullSpace) {
3158       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3159     }
3160     /* check coarse problem residual error */
3161     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3162     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3163     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3164     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3165     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3166     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3167     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3168     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3169     /* get eigenvalue estimation if preonly has not been requested */
3170     if (!ispreonly) {
3171       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3172       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3173       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3174     }
3175     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3176     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3177   }
3178   /* free memory */
3179   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3180   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3186 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3187 {
3188   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3189   PC_IS*         pcis = (PC_IS*)pc->data;
3190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3191   PetscInt       i,j,coarse_size;
3192   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3193   PetscErrorCode ierr;
3194 
3195   PetscFunctionBegin;
3196   /* get indices in local ordering for vertices and constraints */
3197   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3198   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3199   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3200   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3201   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3202   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3203   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3204 
3205   /* Compute global number of coarse dofs */
3206   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3207 
3208   /* check numbering */
3209   if (pcbddc->dbg_flag) {
3210     PetscScalar coarsesum,*array;
3211 
3212     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3213     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3214     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3215     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3216     for (i=0;i<pcbddc->local_primal_size;i++) {
3217       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3218     }
3219     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3220     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3221     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3222     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3223     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3224     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3225     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3226     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3227     for (i=0;i<pcis->n;i++) {
3228       if (array[i] == 1.0) {
3229         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3230       }
3231     }
3232     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3233     for (i=0;i<pcis->n;i++) {
3234       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3235     }
3236     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3237     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3238     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3239     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3240     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3241     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3242     if (pcbddc->dbg_flag > 1) {
3243       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3244       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3245       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3246       for (i=0;i<pcbddc->local_primal_size;i++) {
3247         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3248       }
3249       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3250     }
3251     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3252   }
3253   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3254   /* get back data */
3255   *coarse_size_n = coarse_size;
3256   *local_primal_indices_n = local_primal_indices;
3257   PetscFunctionReturn(0);
3258 }
3259 
3260