xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 93bd9ae7ebd6e32f9ba30cca0adcbe3c21787c25)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559     unsymmetric_check = PETSC_TRUE;
560   } else { /* take references to already computed coarse basis */
561     unsymmetric_check = PETSC_FALSE;
562     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
563     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
564     if (pcbddc->coarse_phi_D) {
565       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
566       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
567     }
568   }
569   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
570   /* Checking coarse_sub_mat and coarse basis functios */
571   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
572   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
573   if (pcbddc->dbg_flag) {
574     Mat         coarse_sub_mat;
575     Mat         AUXMAT,TM1,TM2,TM3,TM4;
576     Mat         coarse_phi_D,coarse_phi_B;
577     Mat         coarse_psi_D,coarse_psi_B;
578     Mat         A_II,A_BB,A_IB,A_BI;
579     MatType     checkmattype=MATSEQAIJ;
580     PetscReal   real_value;
581 
582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
588     if (unsymmetric_check) {
589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
591     }
592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
593 
594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597     if (unsymmetric_check) {
598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     } else {
611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
619     }
620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
627     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
632     }
633     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
634     for (i=0;i<pcbddc->local_primal_size;i++) {
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
636     }
637     if (unsymmetric_check) {
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
641       }
642       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
643       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
645       }
646     }
647     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
649     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
652     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
653     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
656     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
663     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
664     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
665     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
666   }
667   /* free memory */
668   if (n_vertices) {
669     ierr = PetscFree(vertices);CHKERRQ(ierr);
670     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
672     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
675   }
676   if (n_constraints) {
677     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
678     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
679     ierr = MatDestroy(&M1);CHKERRQ(ierr);
680     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
681   }
682   ierr = PetscFree(auxindices);CHKERRQ(ierr);
683   /* get back data */
684   *coarse_submat_vals_n = coarse_submat_vals;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
691 {
692   PC_IS*            pcis = (PC_IS*)(pc->data);
693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
695   PetscBool         issbaij,isbaij;
696   /* manage repeated solves */
697   MatReuse          reuse;
698   MatStructure      matstruct;
699   PetscErrorCode    ierr;
700 
701   PetscFunctionBegin;
702   /* get mat flags */
703   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
704   reuse = MAT_INITIAL_MATRIX;
705   if (pc->setupcalled) {
706     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
707     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
708     if (matstruct == SAME_NONZERO_PATTERN) {
709       reuse = MAT_REUSE_MATRIX;
710     } else {
711       reuse = MAT_INITIAL_MATRIX;
712     }
713   }
714   if (reuse == MAT_INITIAL_MATRIX) {
715     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
716     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
717     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
720   }
721 
722   /* transform local matrices if needed */
723   if (pcbddc->use_change_of_basis) {
724     Mat         change_mat_all;
725     PetscScalar *row_cmat_values;
726     PetscInt    *row_cmat_indices;
727     PetscInt    *nnz,*is_indices,*temp_indices;
728     PetscInt    i,j,k,n_D,n_B;
729 
730     /* Get Non-overlapping dimensions */
731     n_B = pcis->n_B;
732     n_D = pcis->n-n_B;
733 
734     /* compute nonzero structure of change of basis on all local nodes */
735     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     k=1;
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
743       nnz[is_indices[i]]=j;
744       if (k < j) k = j;
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     /* assemble change of basis matrix on the whole set of local dofs */
749     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
750     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
751     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
752     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
753     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
754     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     for (i=0;i<n_D;i++) {
756       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
757     }
758     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
759     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
760     for (i=0;i<n_B;i++) {
761       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
762       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
763       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
764       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
765     }
766     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
769     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
770     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
771     if (!issbaij && !isbaij) {
772       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773     } else {
774       Mat work_mat;
775       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
776       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
777       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
778     }
779     /*
780     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
781     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
782     */
783     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
784     ierr = PetscFree(nnz);CHKERRQ(ierr);
785     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
786   } else {
787     /* without change of basis, the local matrix is unchanged */
788     if (!pcbddc->local_mat) {
789       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
790       pcbddc->local_mat = matis->A;
791     }
792   }
793 
794   /* get submatrices */
795   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
796   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
797   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
798   if (!issbaij) {
799     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
800     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
801   } else {
802     Mat newmat;
803     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
804     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
805     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
806     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
814 {
815   PC_IS*         pcis = (PC_IS*)(pc->data);
816   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
817   IS             is_aux1,is_aux2;
818   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
819   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
820   PetscInt       vbs,bs;
821   PetscBT        bitmask;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   /* Set Non-overlapping dimensions */
826   n_B = pcis->n_B; n_D = pcis->n - n_B;
827   /* get vertex indices from constraint matrix */
828   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
829 
830   /* create auxiliary bitmask */
831   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
832   for (i=0;i<n_vertices;i++) {
833     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
834   }
835 
836   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
837   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
838   for (i=0, n_R=0; i<pcis->n; i++) {
839     if (!PetscBTLookup(bitmask,i)) {
840       idx_R_local[n_R] = i;
841       n_R++;
842     }
843   }
844 
845   /* Block code */
846   vbs = 1;
847   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
848   if (bs>1 && !(n_vertices%bs)) {
849     PetscBool is_blocked = PETSC_TRUE;
850     PetscInt  *vary;
851     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
852     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
853     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
854     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
855     for (i=0; i<n_vertices; i++) {
856       if (vary[i]!=0 && vary[i]!=bs) {
857         is_blocked = PETSC_FALSE;
858         break;
859       }
860     }
861     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
862       vbs = bs;
863       for (i=0;i<n_R/vbs;i++) {
864         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
865       }
866     }
867     ierr = PetscFree(vary);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(vertices);CHKERRQ(ierr);
870   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
871   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
872 
873   /* print some info if requested */
874   if (pcbddc->dbg_flag) {
875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
877     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
878     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
879     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
880     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
885   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
886   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
887   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0; i<n_D; i++) {
890     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
891   }
892   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_R; i++) {
894     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
895       aux_array1[j++] = i;
896     }
897   }
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
899   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
900   for (i=0, j=0; i<n_B; i++) {
901     if (!PetscBTLookup(bitmask,is_indices[i])) {
902       aux_array2[j++] = i;
903     }
904   }
905   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
906   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
907   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
908   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
909   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
910 
911   if (pcbddc->switch_static || pcbddc->dbg_flag) {
912     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
913     for (i=0, j=0; i<n_R; i++) {
914       if (PetscBTLookup(bitmask,idx_R_local[i])) {
915         aux_array1[j++] = i;
916       }
917     }
918     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
919     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
920     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
921   }
922   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
931 {
932   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
933   PC_IS          *pcis = (PC_IS*)pc->data;
934   PC             pc_temp;
935   Mat            A_RR;
936   Vec            vec1,vec2,vec3;
937   MatStructure   matstruct;
938   PetscScalar    m_one = -1.0;
939   PetscReal      value;
940   PetscInt       n_D,n_R,ibs,mbs;
941   PetscBool      use_exact,use_exact_reduced,issbaij;
942   PetscErrorCode ierr;
943   /* prefixes stuff */
944   char           dir_prefix[256],neu_prefix[256],str_level[3];
945   size_t         len;
946 
947   PetscFunctionBegin;
948   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
949 
950   /* compute prefixes */
951   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
952   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
953   if (!pcbddc->current_level) {
954     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
955     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
956     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
957     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
958   } else {
959     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
960     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
961     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
962     len -= 15; /* remove "pc_bddc_coarse_" */
963     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
964     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
965     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
966     *(dir_prefix+len)='\0';
967     *(neu_prefix+len)='\0';
968     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
969     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
970     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
971     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
972   }
973 
974   /* DIRICHLET PROBLEM */
975   /* Matrix for Dirichlet problem is pcis->A_II */
976   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
977   if (!pcbddc->ksp_D) { /* create object if not yet build */
978     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
979     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
980     /* default */
981     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
982     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
983     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
984     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
985     if (issbaij) {
986       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
987     } else {
988       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
989     }
990     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
991   }
992   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
993   /* Allow user's customization */
994   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
996   if (!n_D) {
997     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
998     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
999   }
1000   /* Set Up KSP for Dirichlet problem of BDDC */
1001   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1002   /* set ksp_D into pcis data */
1003   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1004   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1005   pcis->ksp_D = pcbddc->ksp_D;
1006 
1007   /* NEUMANN PROBLEM */
1008   /* Matrix for Neumann problem is A_RR -> we need to create it */
1009   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1011   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1012   if (ibs != mbs) {
1013     Mat newmat;
1014     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1015     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1016     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1017   } else {
1018     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1019   }
1020   if (!pcbddc->ksp_R) { /* create object if not yet build */
1021     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1022     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1023     /* default */
1024     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1025     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1026     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1027     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1028     if (issbaij) {
1029       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1030     } else {
1031       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1032     }
1033     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1034   }
1035   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1036   /* Allow user's customization */
1037   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1038   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1039   if (!n_R) {
1040     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1041     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1042   }
1043   /* Set Up KSP for Neumann problem of BDDC */
1044   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1045 
1046   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1047 
1048   /* Dirichlet */
1049   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1050   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1051   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1052   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1053   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1054   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1055   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1056   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1057   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1058   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1059   /* need to be adapted? */
1060   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1061   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1062   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1063   /* print info */
1064   if (pcbddc->dbg_flag) {
1065     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1068     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1070   }
1071   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1072     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1073   }
1074 
1075   /* Neumann */
1076   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1077   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1078   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1079   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1080   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1081   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1082   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1083   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1084   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1085   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1086   /* need to be adapted? */
1087   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1088   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089   /* print info */
1090   if (pcbddc->dbg_flag) {
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1095     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1096   }
1097   /* free Neumann problem's matrix */
1098   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1104 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1105 {
1106   PetscErrorCode ierr;
1107   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1108 
1109   PetscFunctionBegin;
1110   if (applytranspose) {
1111     if (pcbddc->local_auxmat1) {
1112       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1113       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1114     }
1115     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1116   } else {
1117     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1118     if (pcbddc->local_auxmat1) {
1119       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1120       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1121     }
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1127 #undef __FUNCT__
1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1129 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1130 {
1131   PetscErrorCode ierr;
1132   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1133   PC_IS*            pcis = (PC_IS*)  (pc->data);
1134   const PetscScalar zero = 0.0;
1135 
1136   PetscFunctionBegin;
1137   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1138   if (applytranspose) {
1139     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1140     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1141   } else {
1142     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1143     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1144   }
1145   /* Scatter data of coarse_rhs */
1146   ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr);
1147   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148 
1149   /* Local solution on R nodes */
1150   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1151   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   if (pcbddc->switch_static) {
1154     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   }
1157   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1158   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1159   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161   if (pcbddc->switch_static) {
1162     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1163     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   }
1165 
1166   /* Coarse solution */
1167   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168   /* TODO remove null space when doing multilevel */
1169   if (applytranspose) {
1170     ierr = KSPSolveTranspose(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1171   } else {
1172     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1173   }
1174   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1175   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1176 
1177   /* Sum contributions from two levels */
1178   if (applytranspose) {
1179     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1180     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1181   } else {
1182     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1183     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1190 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1191 {
1192   PetscErrorCode ierr;
1193   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1194 
1195   PetscFunctionBegin;
1196   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1202 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1203 {
1204   PetscErrorCode ierr;
1205   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1206 
1207   PetscFunctionBegin;
1208   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 /* uncomment for testing purposes */
1213 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1214 #undef __FUNCT__
1215 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1216 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1217 {
1218   PetscErrorCode    ierr;
1219   PC_IS*            pcis = (PC_IS*)(pc->data);
1220   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1221   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1222   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1223   MatType           impMatType=MATSEQAIJ;
1224   /* one and zero */
1225   PetscScalar       one=1.0,zero=0.0;
1226   /* space to store constraints and their local indices */
1227   PetscScalar       *temp_quadrature_constraint;
1228   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1229   /* iterators */
1230   PetscInt          i,j,k,total_counts,temp_start_ptr;
1231   /* stuff to store connected components stored in pcbddc->mat_graph */
1232   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1233   PetscInt          n_ISForFaces,n_ISForEdges;
1234   /* near null space stuff */
1235   MatNullSpace      nearnullsp;
1236   const Vec         *nearnullvecs;
1237   Vec               *localnearnullsp;
1238   PetscBool         nnsp_has_cnst;
1239   PetscInt          nnsp_size;
1240   PetscScalar       *array;
1241   /* BLAS integers */
1242   PetscBLASInt      lwork,lierr;
1243   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1244   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1245   /* LAPACK working arrays for SVD or POD */
1246   PetscBool         skip_lapack;
1247   PetscScalar       *work;
1248   PetscReal         *singular_vals;
1249 #if defined(PETSC_USE_COMPLEX)
1250   PetscReal         *rwork;
1251 #endif
1252 #if defined(PETSC_MISSING_LAPACK_GESVD)
1253   PetscBLASInt      Blas_one_2=1;
1254   PetscScalar       *temp_basis,*correlation_mat;
1255 #else
1256   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1257   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1258 #endif
1259   /* change of basis */
1260   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1261   PetscBool         boolforchange,qr_needed;
1262   PetscBT           touched,change_basis,qr_needed_idx;
1263   /* auxiliary stuff */
1264   PetscInt          *nnz,*is_indices,*local_to_B;
1265   /* some quantities */
1266   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1267   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1268 
1269 
1270   PetscFunctionBegin;
1271   /* Get index sets for faces, edges and vertices from graph */
1272   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1273     pcbddc->use_vertices = PETSC_TRUE;
1274   }
1275   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1276   /* HACK: provide functions to set change of basis */
1277   if (!ISForVertices && pcbddc->NullSpace) {
1278     pcbddc->use_change_of_basis = PETSC_TRUE;
1279     pcbddc->use_change_on_faces = PETSC_FALSE;
1280   }
1281   /* print some info */
1282   if (pcbddc->dbg_flag) {
1283     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1284     i = 0;
1285     if (ISForVertices) {
1286       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1287     }
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1289     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1290     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1291     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1292   }
1293   /* check if near null space is attached to global mat */
1294   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1295   if (nearnullsp) {
1296     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1297   } else { /* if near null space is not provided BDDC uses constants by default */
1298     nnsp_size = 0;
1299     nnsp_has_cnst = PETSC_TRUE;
1300   }
1301   /* get max number of constraints on a single cc */
1302   max_constraints = nnsp_size;
1303   if (nnsp_has_cnst) max_constraints++;
1304 
1305   /*
1306        Evaluate maximum storage size needed by the procedure
1307        - temp_indices will contain start index of each constraint stored as follows
1308        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1309        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1310        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1311                                                                                                                                                          */
1312   total_counts = n_ISForFaces+n_ISForEdges;
1313   total_counts *= max_constraints;
1314   n_vertices = 0;
1315   if (ISForVertices) {
1316     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1317   }
1318   total_counts += n_vertices;
1319   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1320   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1321   total_counts = 0;
1322   max_size_of_constraint = 0;
1323   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1324     if (i<n_ISForEdges) {
1325       used_IS = &ISForEdges[i];
1326     } else {
1327       used_IS = &ISForFaces[i-n_ISForEdges];
1328     }
1329     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1330     total_counts += j;
1331     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1332   }
1333   total_counts *= max_constraints;
1334   total_counts += n_vertices;
1335   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1336   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1337   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1338   /* local to boundary numbering */
1339   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1342   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1343   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1344   /* get local part of global near null space vectors */
1345   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1346   for (k=0;k<nnsp_size;k++) {
1347     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1348     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1349     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   }
1351 
1352   /* whether or not to skip lapack calls */
1353   skip_lapack = PETSC_TRUE;
1354   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1355 
1356   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1357   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1358     PetscScalar temp_work;
1359 #if defined(PETSC_MISSING_LAPACK_GESVD)
1360     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1361     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1362     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1363     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1364 #if defined(PETSC_USE_COMPLEX)
1365     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1366 #endif
1367     /* now we evaluate the optimal workspace using query with lwork=-1 */
1368     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1369     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1370     lwork = -1;
1371     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1372 #if !defined(PETSC_USE_COMPLEX)
1373     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1374 #else
1375     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1376 #endif
1377     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1378     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1379 #else /* on missing GESVD */
1380     /* SVD */
1381     PetscInt max_n,min_n;
1382     max_n = max_size_of_constraint;
1383     min_n = max_constraints;
1384     if (max_size_of_constraint < max_constraints) {
1385       min_n = max_size_of_constraint;
1386       max_n = max_constraints;
1387     }
1388     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1389 #if defined(PETSC_USE_COMPLEX)
1390     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1391 #endif
1392     /* now we evaluate the optimal workspace using query with lwork=-1 */
1393     lwork = -1;
1394     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1395     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1396     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1397     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1398 #if !defined(PETSC_USE_COMPLEX)
1399     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1400 #else
1401     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1402 #endif
1403     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1404     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1405 #endif /* on missing GESVD */
1406     /* Allocate optimal workspace */
1407     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1408     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1409   }
1410   /* Now we can loop on constraining sets */
1411   total_counts = 0;
1412   temp_indices[0] = 0;
1413   /* vertices */
1414   if (ISForVertices) {
1415     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1416     if (nnsp_has_cnst) { /* consider all vertices */
1417       for (i=0;i<n_vertices;i++) {
1418         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1419         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1420         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1421         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1422         total_counts++;
1423       }
1424     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1425       PetscBool used_vertex;
1426       for (i=0;i<n_vertices;i++) {
1427         used_vertex = PETSC_FALSE;
1428         k = 0;
1429         while (!used_vertex && k<nnsp_size) {
1430           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1431           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1432             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1433             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1434             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1435             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1436             total_counts++;
1437             used_vertex = PETSC_TRUE;
1438           }
1439           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1440           k++;
1441         }
1442       }
1443     }
1444     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1445     n_vertices = total_counts;
1446   }
1447 
1448   /* edges and faces */
1449   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1450     if (i<n_ISForEdges) {
1451       used_IS = &ISForEdges[i];
1452       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1453     } else {
1454       used_IS = &ISForFaces[i-n_ISForEdges];
1455       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1456     }
1457     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1458     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1459     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1460     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1461     /* change of basis should not be performed on local periodic nodes */
1462     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1463     if (nnsp_has_cnst) {
1464       PetscScalar quad_value;
1465       temp_constraints++;
1466       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1467       for (j=0;j<size_of_constraint;j++) {
1468         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1469         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1470         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1471       }
1472       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1473       total_counts++;
1474     }
1475     for (k=0;k<nnsp_size;k++) {
1476       PetscReal real_value;
1477       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1478       for (j=0;j<size_of_constraint;j++) {
1479         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1480         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1481         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1482       }
1483       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1484       /* check if array is null on the connected component */
1485       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1486       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1487       if (real_value > 0.0) { /* keep indices and values */
1488         temp_constraints++;
1489         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1490         total_counts++;
1491       }
1492     }
1493     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1494     valid_constraints = temp_constraints;
1495     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1496     if (!pcbddc->use_nnsp_true && temp_constraints) {
1497       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1498 
1499 #if defined(PETSC_MISSING_LAPACK_GESVD)
1500       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1501          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1502          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1503             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1504             from that computed using LAPACKgesvd
1505          -> This is due to a different computation of eigenvectors in LAPACKheev
1506          -> The quality of the POD-computed basis will be the same */
1507       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1508       /* Store upper triangular part of correlation matrix */
1509       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1510       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1511       for (j=0;j<temp_constraints;j++) {
1512         for (k=0;k<j+1;k++) {
1513           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1514         }
1515       }
1516       /* compute eigenvalues and eigenvectors of correlation matrix */
1517       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1518       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1519 #if !defined(PETSC_USE_COMPLEX)
1520       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1521 #else
1522       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1523 #endif
1524       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1525       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1526       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1527       j=0;
1528       while (j < temp_constraints && singular_vals[j] < tol) j++;
1529       total_counts=total_counts-j;
1530       valid_constraints = temp_constraints-j;
1531       /* scale and copy POD basis into used quadrature memory */
1532       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1533       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1534       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1535       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1536       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1537       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1538       if (j<temp_constraints) {
1539         PetscInt ii;
1540         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1541         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1542         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1543         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1544         for (k=0;k<temp_constraints-j;k++) {
1545           for (ii=0;ii<size_of_constraint;ii++) {
1546             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1547           }
1548         }
1549       }
1550 #else  /* on missing GESVD */
1551       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1552       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1553       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1554       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1555 #if !defined(PETSC_USE_COMPLEX)
1556       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1557 #else
1558       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1559 #endif
1560       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1561       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1562       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1563       k = temp_constraints;
1564       if (k > size_of_constraint) k = size_of_constraint;
1565       j = 0;
1566       while (j < k && singular_vals[k-j-1] < tol) j++;
1567       total_counts = total_counts-temp_constraints+k-j;
1568       valid_constraints = k-j;
1569 #endif /* on missing GESVD */
1570     }
1571     /* setting change_of_basis flag is safe now */
1572     if (boolforchange) {
1573       for (j=0;j<valid_constraints;j++) {
1574         PetscBTSet(change_basis,total_counts-j-1);
1575       }
1576     }
1577   }
1578   /* free index sets of faces, edges and vertices */
1579   for (i=0;i<n_ISForFaces;i++) {
1580     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1581   }
1582   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1583   for (i=0;i<n_ISForEdges;i++) {
1584     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1585   }
1586   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1587   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1588 
1589   /* free workspace */
1590   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1591     ierr = PetscFree(work);CHKERRQ(ierr);
1592 #if defined(PETSC_USE_COMPLEX)
1593     ierr = PetscFree(rwork);CHKERRQ(ierr);
1594 #endif
1595     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1596 #if defined(PETSC_MISSING_LAPACK_GESVD)
1597     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1598     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1599 #endif
1600   }
1601   for (k=0;k<nnsp_size;k++) {
1602     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1603   }
1604   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1605 
1606   /* set quantities in pcbddc data structure */
1607   /* n_vertices defines the number of subdomain corners in the primal space */
1608   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1609   pcbddc->local_primal_size = total_counts;
1610   pcbddc->n_vertices = n_vertices;
1611   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1612 
1613   /* Create constraint matrix */
1614   /* The constraint matrix is used to compute the l2g map of primal dofs */
1615   /* so we need to set it up properly either with or without change of basis */
1616   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1617   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1618   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1619   /* array to compute a local numbering of constraints : vertices first then constraints */
1620   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1621   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1622   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1623   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1624   /* auxiliary stuff for basis change */
1625   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1626   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1627 
1628   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1629   total_primal_vertices=0;
1630   for (i=0;i<pcbddc->local_primal_size;i++) {
1631     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1632     if (size_of_constraint == 1) {
1633       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1634       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1635       aux_primal_minloc[total_primal_vertices]=0;
1636       total_primal_vertices++;
1637     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1638       PetscInt min_loc,min_index;
1639       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1640       /* find first untouched local node */
1641       k = 0;
1642       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1643       min_index = global_indices[k];
1644       min_loc = k;
1645       /* search the minimum among global nodes already untouched on the cc */
1646       for (k=1;k<size_of_constraint;k++) {
1647         /* there can be more than one constraint on a single connected component */
1648         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1649           min_index = global_indices[k];
1650           min_loc = k;
1651         }
1652       }
1653       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1654       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1655       aux_primal_minloc[total_primal_vertices]=min_loc;
1656       total_primal_vertices++;
1657     }
1658   }
1659   /* determine if a QR strategy is needed for change of basis */
1660   qr_needed = PETSC_FALSE;
1661   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1662   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1663     if (PetscBTLookup(change_basis,i)) {
1664       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1665       j = 0;
1666       for (k=0;k<size_of_constraint;k++) {
1667         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1668           j++;
1669         }
1670       }
1671       /* found more than one primal dof on the cc */
1672       if (j > 1) {
1673         PetscBTSet(qr_needed_idx,i);
1674         qr_needed = PETSC_TRUE;
1675       }
1676     }
1677   }
1678   /* free workspace */
1679   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1680 
1681   /* permute indices in order to have a sorted set of vertices */
1682   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1683 
1684   /* nonzero structure of constraint matrix */
1685   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1686   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1687   j=total_primal_vertices;
1688   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1689     if (!PetscBTLookup(change_basis,i)) {
1690       nnz[j]=temp_indices[i+1]-temp_indices[i];
1691       j++;
1692     }
1693   }
1694   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1695   ierr = PetscFree(nnz);CHKERRQ(ierr);
1696   /* set values in constraint matrix */
1697   for (i=0;i<total_primal_vertices;i++) {
1698     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1699   }
1700   total_counts = total_primal_vertices;
1701   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1702     if (!PetscBTLookup(change_basis,i)) {
1703       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1704       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1705       total_counts++;
1706     }
1707   }
1708   /* assembling */
1709   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1710   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1711   /*
1712   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1713   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1714   */
1715   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1716   if (pcbddc->use_change_of_basis) {
1717     /* dual and primal dofs on a single cc */
1718     PetscInt     dual_dofs,primal_dofs;
1719     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1720     PetscInt     primal_counter;
1721     /* working stuff for GEQRF */
1722     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1723     PetscBLASInt lqr_work;
1724     /* working stuff for UNGQR */
1725     PetscScalar  *gqr_work,lgqr_work_t;
1726     PetscBLASInt lgqr_work;
1727     /* working stuff for TRTRS */
1728     PetscScalar  *trs_rhs;
1729     PetscBLASInt Blas_NRHS;
1730     /* pointers for values insertion into change of basis matrix */
1731     PetscInt     *start_rows,*start_cols;
1732     PetscScalar  *start_vals;
1733     /* working stuff for values insertion */
1734     PetscBT      is_primal;
1735 
1736     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1737     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1738     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1739     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1740     /* work arrays */
1741     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1742     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1743     /* nonzeros per row */
1744     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1745       if (PetscBTLookup(change_basis,i)) {
1746         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1747         if (PetscBTLookup(qr_needed_idx,i)) {
1748           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1749         } else {
1750           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1751           /* get local primal index on the cc */
1752           j = 0;
1753           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1754           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1755         }
1756       }
1757     }
1758     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1759     ierr = PetscFree(nnz);CHKERRQ(ierr);
1760     /* Set initial identity in the matrix */
1761     for (i=0;i<pcis->n_B;i++) {
1762       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1763     }
1764 
1765     if (pcbddc->dbg_flag) {
1766       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1767       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1768     }
1769 
1770 
1771     /* Now we loop on the constraints which need a change of basis */
1772     /*
1773        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1774        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1775 
1776        Basic blocks of change of basis matrix T computed
1777 
1778           - Using the following block transformation if there is only a primal dof on the cc
1779             (in the example, primal dof is the last one of the edge in LOCAL ordering
1780              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1781             | 1        0   ...        0     1 |
1782             | 0        1   ...        0     1 |
1783             |              ...                |
1784             | 0        ...            1     1 |
1785             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1786 
1787           - via QR decomposition of constraints otherwise
1788     */
1789     if (qr_needed) {
1790       /* space to store Q */
1791       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1792       /* first we issue queries for optimal work */
1793       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1794       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1795       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1796       lqr_work = -1;
1797       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1798       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1799       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1800       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1801       lgqr_work = -1;
1802       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1803       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1804       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1805       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1806       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1807       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1808       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1809       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1810       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1811       /* array to store scaling factors for reflectors */
1812       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1813       /* array to store rhs and solution of triangular solver */
1814       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1815       /* allocating workspace for check */
1816       if (pcbddc->dbg_flag) {
1817         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1818       }
1819     }
1820     /* array to store whether a node is primal or not */
1821     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1822     for (i=0;i<total_primal_vertices;i++) {
1823       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1824     }
1825 
1826     /* loop on constraints and see whether or not they need a change of basis and compute it */
1827     /* -> using implicit ordering contained in temp_indices data */
1828     total_counts = pcbddc->n_vertices;
1829     primal_counter = total_counts;
1830     while (total_counts<pcbddc->local_primal_size) {
1831       primal_dofs = 1;
1832       if (PetscBTLookup(change_basis,total_counts)) {
1833         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1834         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1835           primal_dofs++;
1836         }
1837         /* get constraint info */
1838         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1839         dual_dofs = size_of_constraint-primal_dofs;
1840 
1841         if (pcbddc->dbg_flag) {
1842           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1843         }
1844 
1845         if (primal_dofs > 1) { /* QR */
1846 
1847           /* copy quadrature constraints for change of basis check */
1848           if (pcbddc->dbg_flag) {
1849             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1850           }
1851           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1852           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1853 
1854           /* compute QR decomposition of constraints */
1855           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1856           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1857           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1858           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1859           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1860           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1861           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1862 
1863           /* explictly compute R^-T */
1864           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1865           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1869           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1870           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1871           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1872           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1873           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1874 
1875           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1876           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1877           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1878           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1879           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1880           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1881           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1882           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1883           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1884 
1885           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1886              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1887              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1888           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1889           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1890           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1891           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1892           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1893           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1894           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1895           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1896           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1897           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1898 
1899           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1900           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1901           /* insert cols for primal dofs */
1902           for (j=0;j<primal_dofs;j++) {
1903             start_vals = &qr_basis[j*size_of_constraint];
1904             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1905             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1906           }
1907           /* insert cols for dual dofs */
1908           for (j=0,k=0;j<dual_dofs;k++) {
1909             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1910               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1911               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1912               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1913               j++;
1914             }
1915           }
1916 
1917           /* check change of basis */
1918           if (pcbddc->dbg_flag) {
1919             PetscInt   ii,jj;
1920             PetscBool valid_qr=PETSC_TRUE;
1921             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1922             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1923             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1924             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1925             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1926             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1927             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1928             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1929             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1930             for (jj=0;jj<size_of_constraint;jj++) {
1931               for (ii=0;ii<primal_dofs;ii++) {
1932                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1933                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1934               }
1935             }
1936             if (!valid_qr) {
1937               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1938               for (jj=0;jj<size_of_constraint;jj++) {
1939                 for (ii=0;ii<primal_dofs;ii++) {
1940                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1941                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1942                   }
1943                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1944                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1945                   }
1946                 }
1947               }
1948             } else {
1949               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1950             }
1951           }
1952         } else { /* simple transformation block */
1953           PetscInt row,col;
1954           PetscScalar val;
1955           for (j=0;j<size_of_constraint;j++) {
1956             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1957             if (!PetscBTLookup(is_primal,row)) {
1958               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1959               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1960               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1961             } else {
1962               for (k=0;k<size_of_constraint;k++) {
1963                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1964                 if (row != col) {
1965                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1966                 } else {
1967                   val = 1.0;
1968                 }
1969                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1970               }
1971             }
1972           }
1973         }
1974         /* increment primal counter */
1975         primal_counter += primal_dofs;
1976       } else {
1977         if (pcbddc->dbg_flag) {
1978           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1979         }
1980       }
1981       /* increment constraint counter total_counts */
1982       total_counts += primal_dofs;
1983     }
1984 
1985     /* free workspace */
1986     if (qr_needed) {
1987       if (pcbddc->dbg_flag) {
1988         ierr = PetscFree(work);CHKERRQ(ierr);
1989       }
1990       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1991       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1992       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1993       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1994       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1995     }
1996     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1997     /* assembling */
1998     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1999     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2000     /*
2001     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2002     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2003     */
2004   }
2005 
2006   /* flush dbg viewer */
2007   if (pcbddc->dbg_flag) {
2008     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2009   }
2010 
2011   /* free workspace */
2012   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2013   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2014   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2015   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2016   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2017   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2018   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2019   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2020   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2021   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2027 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2028 {
2029   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2030   PC_IS       *pcis = (PC_IS*)pc->data;
2031   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2032   PetscInt    bs,ierr,i,vertex_size;
2033   PetscViewer viewer=pcbddc->dbg_viewer;
2034 
2035   PetscFunctionBegin;
2036   /* Init local Graph struct */
2037   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2038 
2039   /* Check validity of the csr graph passed in by the user */
2040   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2041     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2042   }
2043 
2044   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2045   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2046     Mat mat_adj;
2047     const PetscInt *xadj,*adjncy;
2048     PetscBool flg_row=PETSC_TRUE;
2049 
2050     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2051     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2052     if (!flg_row) {
2053       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2054     }
2055     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2056     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2057     if (!flg_row) {
2058       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2059     }
2060     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2061   }
2062 
2063   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2064   vertex_size = 1;
2065   if (!pcbddc->user_provided_isfordofs) {
2066     if (!pcbddc->n_ISForDofs) {
2067       IS *custom_ISForDofs;
2068 
2069       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2070       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2071       for (i=0;i<bs;i++) {
2072         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2073       }
2074       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2075       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2076       /* remove my references to IS objects */
2077       for (i=0;i<bs;i++) {
2078         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2079       }
2080       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2081     }
2082   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2083     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2084   }
2085 
2086   /* Setup of Graph */
2087   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2088 
2089   /* Graph's connected components analysis */
2090   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2091 
2092   /* print some info to stdout */
2093   if (pcbddc->dbg_flag) {
2094     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2101 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2102 {
2103   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2104   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   n = 0;
2109   vertices = 0;
2110   if (pcbddc->ConstraintMatrix) {
2111     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2112     for (i=0;i<local_primal_size;i++) {
2113       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2114       if (size_of_constraint == 1) n++;
2115       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2116     }
2117     if (vertices_idx) {
2118       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2119       n = 0;
2120       for (i=0;i<local_primal_size;i++) {
2121         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2122         if (size_of_constraint == 1) {
2123           vertices[n++]=row_cmat_indices[0];
2124         }
2125         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2126       }
2127     }
2128   }
2129   *n_vertices = n;
2130   if (vertices_idx) *vertices_idx = vertices;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2136 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2137 {
2138   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2139   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2140   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2141   PetscBT        touched;
2142   PetscErrorCode ierr;
2143 
2144     /* This function assumes that the number of local constraints per connected component
2145        is not greater than the number of nodes defined for the connected component
2146        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2147   PetscFunctionBegin;
2148   n = 0;
2149   constraints_index = 0;
2150   if (pcbddc->ConstraintMatrix) {
2151     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2152     max_size_of_constraint = 0;
2153     for (i=0;i<local_primal_size;i++) {
2154       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2155       if (size_of_constraint > 1) {
2156         n++;
2157       }
2158       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2159       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2160     }
2161     if (constraints_idx) {
2162       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2163       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2164       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2165       n = 0;
2166       for (i=0;i<local_primal_size;i++) {
2167         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2168         if (size_of_constraint > 1) {
2169           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2170           /* find first untouched local node */
2171           j = 0;
2172           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2173           min_index = row_cmat_global_indices[j];
2174           min_loc = j;
2175           /* search the minimum among nodes not yet touched on the connected component
2176              since there can be more than one constraint on a single cc */
2177           for (j=1;j<size_of_constraint;j++) {
2178             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2179               min_index = row_cmat_global_indices[j];
2180               min_loc = j;
2181             }
2182           }
2183           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2184           constraints_index[n++] = row_cmat_indices[min_loc];
2185         }
2186         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2187       }
2188       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2189       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2190     }
2191   }
2192   *n_constraints = n;
2193   if (constraints_idx) *constraints_idx = constraints_index;
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 /* the next two functions has been adapted from pcis.c */
2198 #undef __FUNCT__
2199 #define __FUNCT__ "PCBDDCApplySchur"
2200 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2201 {
2202   PetscErrorCode ierr;
2203   PC_IS          *pcis = (PC_IS*)(pc->data);
2204 
2205   PetscFunctionBegin;
2206   if (!vec2_B) { vec2_B = v; }
2207   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2208   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2209   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2210   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2211   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2217 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2218 {
2219   PetscErrorCode ierr;
2220   PC_IS          *pcis = (PC_IS*)(pc->data);
2221 
2222   PetscFunctionBegin;
2223   if (!vec2_B) { vec2_B = v; }
2224   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2225   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2226   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2227   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2228   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "PCBDDCSubsetNumbering"
2234 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2235 {
2236   Vec            local_vec,global_vec;
2237   IS             seqis,paris;
2238   VecScatter     scatter_ctx;
2239   PetscScalar    *array;
2240   PetscInt       *temp_global_dofs;
2241   PetscScalar    globalsum;
2242   PetscInt       i,j,s;
2243   PetscInt       nlocals,first_index,old_index,max_local;
2244   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2245   PetscMPIInt    *dof_sizes,*dof_displs;
2246   PetscBool      first_found;
2247   PetscErrorCode ierr;
2248 
2249   PetscFunctionBegin;
2250   /* mpi buffers */
2251   MPI_Comm_size(comm,&size_prec_comm);
2252   MPI_Comm_rank(comm,&rank_prec_comm);
2253   j = ( !rank_prec_comm ? size_prec_comm : 0);
2254   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2255   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2256   /* get maximum size of subset */
2257   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2258   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2259   max_local = 0;
2260   if (n_local_dofs) {
2261     max_local = temp_global_dofs[0];
2262     for (i=1;i<n_local_dofs;i++) {
2263       if (max_local < temp_global_dofs[i] ) {
2264         max_local = temp_global_dofs[i];
2265       }
2266     }
2267   }
2268   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2269   max_global++;
2270   max_local = 0;
2271   if (n_local_dofs) {
2272     max_local = local_dofs[0];
2273     for (i=1;i<n_local_dofs;i++) {
2274       if (max_local < local_dofs[i] ) {
2275         max_local = local_dofs[i];
2276       }
2277     }
2278   }
2279   max_local++;
2280   /* allocate workspace */
2281   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2282   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2283   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2284   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2285   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2286   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2287   /* create scatter */
2288   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2289   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2290   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2291   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2292   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2293   /* init array */
2294   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2295   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2296   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2297   if (local_dofs_mult) {
2298     for (i=0;i<n_local_dofs;i++) {
2299       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2300     }
2301   } else {
2302     for (i=0;i<n_local_dofs;i++) {
2303       array[local_dofs[i]]=1.0;
2304     }
2305   }
2306   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2307   /* scatter into global vec and get total number of global dofs */
2308   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2309   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2310   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2311   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2312   /* Fill global_vec with cumulative function for global numbering */
2313   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2314   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2315   nlocals = 0;
2316   first_index = -1;
2317   first_found = PETSC_FALSE;
2318   for (i=0;i<s;i++) {
2319     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2320       first_found = PETSC_TRUE;
2321       first_index = i;
2322     }
2323     nlocals += (PetscInt)PetscRealPart(array[i]);
2324   }
2325   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2326   if (!rank_prec_comm) {
2327     dof_displs[0]=0;
2328     for (i=1;i<size_prec_comm;i++) {
2329       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2330     }
2331   }
2332   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2333   if (first_found) {
2334     array[first_index] += (PetscScalar)nlocals;
2335     old_index = first_index;
2336     for (i=first_index+1;i<s;i++) {
2337       if (PetscRealPart(array[i]) > 0.0) {
2338         array[i] += array[old_index];
2339         old_index = i;
2340       }
2341     }
2342   }
2343   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2344   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2345   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2346   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2347   /* get global ordering of local dofs */
2348   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2349   if (local_dofs_mult) {
2350     for (i=0;i<n_local_dofs;i++) {
2351       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2352     }
2353   } else {
2354     for (i=0;i<n_local_dofs;i++) {
2355       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2356     }
2357   }
2358   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2359   /* free workspace */
2360   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2361   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2362   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2363   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2364   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2365   /* return pointer to global ordering of local dofs */
2366   *global_numbering_subset = temp_global_dofs;
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2372 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2373 {
2374   PetscInt       i,j;
2375   PetscScalar    *alphas;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   /* this implements stabilized Gram-Schmidt */
2380   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2381   for (i=0;i<n;i++) {
2382     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2383     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2384     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2385   }
2386   ierr = PetscFree(alphas);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatConvert_IS_AIJ"
2393 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2394 {
2395   Mat new_mat;
2396   Mat_IS *matis = (Mat_IS*)(mat->data);
2397   /* info on mat */
2398   PetscInt bs,rows,cols;
2399   PetscInt lrows,lcols;
2400   PetscInt local_rows,local_cols;
2401   PetscMPIInt nsubdomains;
2402   /* preallocation */
2403   Vec vec_dnz,vec_onz;
2404   PetscScalar *my_dnz,*my_onz,*array;
2405   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2406   PetscInt  index_row,index_col,owner;
2407   PetscInt  *local_indices,*global_indices;
2408   /* work */
2409   PetscInt i,j;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   /* CHECKS */
2414   /* get info from mat */
2415   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2416   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2417   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2419 
2420   /* MAT_INITIAL_MATRIX */
2421   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2422   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2423   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2424   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2425   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2426   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2427 
2428   /*
2429     preallocation
2430   */
2431 
2432   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2433   /*
2434      Some vectors are needed to sum up properly on shared interface dofs.
2435      Note that preallocation is not exact, since it overestimates nonzeros
2436   */
2437 /*
2438   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2439   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2440   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2441   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2442 */
2443   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2444   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2445   /* All processes need to compute entire row ownership */
2446   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2447   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2448   for (i=0;i<nsubdomains;i++) {
2449     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2450       row_ownership[j]=i;
2451     }
2452   }
2453   /* map indices of local mat to global values */
2454   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2455   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2456   for (i=0;i<local_rows;i++) local_indices[i]=i;
2457   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2458   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2459 
2460   /*
2461      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2462      then, they will be summed up properly. This way, preallocation is always sufficient
2463   */
2464   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2465   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2466   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2467   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2468   for (i=0;i<local_rows;i++) {
2469     index_row = global_indices[i];
2470     for (j=i;j<local_rows;j++) {
2471       owner = row_ownership[index_row];
2472       index_col = global_indices[j];
2473       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2474         my_dnz[i] += 1.0;
2475       } else { /* offdiag block */
2476         my_onz[i] += 1.0;
2477       }
2478       /* same as before, interchanging rows and cols */
2479       if (i != j) {
2480         owner = row_ownership[index_col];
2481         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2482           my_dnz[j] += 1.0;
2483         } else {
2484           my_onz[j] += 1.0;
2485         }
2486       }
2487     }
2488   }
2489   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2490   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2491   if (local_rows) { /* multilevel guard */
2492     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2493     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2494   }
2495   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2496   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2497   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2498   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2499   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2500   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2501   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2502 
2503   /* set computed preallocation in dnz and onz */
2504   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2505   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2506   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2507   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2508   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2509   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2510   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2511   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2512 
2513   /* Resize preallocation if overestimated */
2514   for (i=0;i<lrows;i++) {
2515     dnz[i] = PetscMin(dnz[i],lcols);
2516     onz[i] = PetscMin(onz[i],cols-lcols);
2517   }
2518   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2519   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2520 
2521   /*
2522     Set values. Very Basic.
2523   */
2524   for (i=0;i<local_rows;i++) {
2525     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2526     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2527     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2528     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2529     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2530   }
2531   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2533   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2534 
2535   /* get back mat */
2536   *M = new_mat;
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatISSubassemble_Private"
2542 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2543 {
2544   Mat             subdomain_adj;
2545   IS              new_ranks,ranks_send_to;
2546   MatPartitioning partitioner;
2547   Mat_IS          *matis;
2548   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2549   PetscInt        prank;
2550   PetscMPIInt     size,rank,color;
2551   PetscInt        *xadj,*adjncy,*oldranks;
2552   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2553   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2554   PetscErrorCode  ierr;
2555   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2556   PetscSubcomm    subcomm;
2557 
2558   PetscFunctionBegin;
2559   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2560   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2561   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2562 
2563   /* Get info on mapping */
2564   matis = (Mat_IS*)(mat->data);
2565   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2566   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2567 
2568   /* build local CSR graph of subdomains' connectivity */
2569   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2570   xadj[0] = 0;
2571   xadj[1] = PetscMax(n_neighs-1,0);
2572   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2573   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2574 
2575   if (threshold) {
2576     PetscInt* count,min_threshold;
2577     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2578     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2579     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2580       for (j=0;j<n_shared[i];j++) {
2581         count[shared[i][j]] += 1;
2582       }
2583     }
2584     /* adapt threshold since we dont want to lose significant connections */
2585     min_threshold = n_neighs;
2586     for (i=1;i<n_neighs;i++) {
2587       for (j=0;j<n_shared[i];j++) {
2588         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2589       }
2590     }
2591     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2592     threshold = PetscMax(min_threshold+1,threshold);
2593     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2594     xadj[1] = 0;
2595     for (i=1;i<n_neighs;i++) {
2596       for (j=0;j<n_shared[i];j++) {
2597         if (count[shared[i][j]] < threshold) {
2598           adjncy[xadj[1]] = neighs[i];
2599           adjncy_wgt[xadj[1]] = n_shared[i];
2600           xadj[1]++;
2601           break;
2602         }
2603       }
2604     }
2605     ierr = PetscFree(count);CHKERRQ(ierr);
2606   } else {
2607     if (xadj[1]) {
2608       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2609       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2610     }
2611   }
2612   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2613   if (use_square) {
2614     for (i=0;i<xadj[1];i++) {
2615       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2616     }
2617   }
2618   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2619 
2620   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2621 
2622   /*
2623     Restrict work on active processes only.
2624   */
2625   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2626   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2627   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2628   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2629   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2630   if (color) {
2631     ierr = PetscFree(xadj);CHKERRQ(ierr);
2632     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2633     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2634   } else {
2635     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2636     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2637     prank = rank;
2638     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2639     for (i=0;i<size;i++) {
2640       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2641     }
2642     for (i=0;i<xadj[1];i++) {
2643       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2644     }
2645     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2646     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2647     n_subdomains = (PetscInt)size;
2648     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2649 
2650     /* Partition */
2651     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2652     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2653     if (use_vwgt) {
2654       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2655       v_wgt[0] = local_size;
2656       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2657     }
2658     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2659     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2660     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2661     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2662     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2663 
2664     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2665     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2666     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2667     /* clean up */
2668     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2669     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2670     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2671     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2672   }
2673   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2674 
2675   /* assemble parallel IS for sends */
2676   i = 1;
2677   if (color) i=0;
2678   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2679   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2680 
2681   /* get back IS */
2682   *is_sends = ranks_send_to;
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatISSubassemble"
2690 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2691 {
2692   Mat                    local_mat,new_mat;
2693   Mat_IS                 *matis;
2694   IS                     is_sends_internal;
2695   PetscInt               rows,cols;
2696   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2697   PetscBool              ismatis,isdense;
2698   ISLocalToGlobalMapping l2gmap;
2699   PetscInt*              l2gmap_indices;
2700   const PetscInt*        is_indices;
2701   MatType                new_local_type;
2702   MatTypePrivate         new_local_type_private;
2703   /* buffers */
2704   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2705   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2706   /* MPI */
2707   MPI_Comm               comm;
2708   PetscMPIInt            n_sends,n_recvs,commsize;
2709   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2710   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2711   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2712   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2713   PetscErrorCode         ierr;
2714 
2715   PetscFunctionBegin;
2716   /* checks */
2717   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2718   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2719   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2720   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2721   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2722   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2723   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2724   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2725   PetscValidLogicalCollectiveInt(mat,bs,0);
2726   /* prepare IS for sending if not provided */
2727   if (!is_sends) {
2728     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2729     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2730   } else {
2731     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2732     is_sends_internal = is_sends;
2733   }
2734 
2735   /* get pointer of MATIS data */
2736   matis = (Mat_IS*)mat->data;
2737 
2738   /* get comm */
2739   comm = PetscObjectComm((PetscObject)mat);
2740 
2741   /* compute number of sends */
2742   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2743   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2744 
2745   /* compute number of receives */
2746   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2747   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2748   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2749   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2750   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2751   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2752   ierr = PetscFree(iflags);CHKERRQ(ierr);
2753 
2754   /* prepare send/receive buffers */
2755   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2756   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2757   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2758   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2759 
2760   /* Get data from local mat */
2761   if (!isdense) {
2762     /* TODO: See below some guidelines on how to prepare the local buffers */
2763     /*
2764        send_buffer_vals should contain the raw values of the local matrix
2765        send_buffer_idxs should contain:
2766        - MatType_PRIVATE type
2767        - PetscInt        size_of_l2gmap
2768        - PetscInt        global_row_indices[size_of_l2gmap]
2769        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2770     */
2771   } else {
2772     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2773     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2774     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2775     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2776     send_buffer_idxs[1] = i;
2777     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2778     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2779     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2780     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2781     for (i=0;i<n_sends;i++) {
2782       ilengths_vals[is_indices[i]] = len*len;
2783       ilengths_idxs[is_indices[i]] = len+2;
2784     }
2785   }
2786   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2787   buf_size_idxs = 0;
2788   buf_size_vals = 0;
2789   for (i=0;i<n_recvs;i++) {
2790     buf_size_idxs += (PetscInt)olengths_idxs[i];
2791     buf_size_vals += (PetscInt)olengths_vals[i];
2792   }
2793   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2794   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2795 
2796   /* get new tags for clean communications */
2797   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2798   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2799 
2800   /* allocate for requests */
2801   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2802   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2803   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2804   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2805 
2806   /* communications */
2807   ptr_idxs = recv_buffer_idxs;
2808   ptr_vals = recv_buffer_vals;
2809   for (i=0;i<n_recvs;i++) {
2810     source_dest = onodes[i];
2811     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2812     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2813     ptr_idxs += olengths_idxs[i];
2814     ptr_vals += olengths_vals[i];
2815   }
2816   for (i=0;i<n_sends;i++) {
2817     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2818     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2819     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2820   }
2821   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2822   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2823 
2824   /* assemble new l2g map */
2825   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2826   ptr_idxs = recv_buffer_idxs;
2827   buf_size_idxs = 0;
2828   for (i=0;i<n_recvs;i++) {
2829     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2830     ptr_idxs += olengths_idxs[i];
2831   }
2832   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2833   ptr_idxs = recv_buffer_idxs;
2834   buf_size_idxs = 0;
2835   for (i=0;i<n_recvs;i++) {
2836     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2837     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2838     ptr_idxs += olengths_idxs[i];
2839   }
2840   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2841   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2842   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2843 
2844   /* infer new local matrix type from received local matrices type */
2845   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2846   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2847   new_local_type_private = MATAIJ_PRIVATE;
2848   new_local_type = MATSEQAIJ;
2849   if (n_recvs) {
2850     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2851     ptr_idxs = recv_buffer_idxs;
2852     for (i=0;i<n_recvs;i++) {
2853       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2854         new_local_type_private = MATAIJ_PRIVATE;
2855         break;
2856       }
2857       ptr_idxs += olengths_idxs[i];
2858     }
2859     switch (new_local_type_private) {
2860       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2861         new_local_type = MATSEQAIJ;
2862         bs = 1;
2863         break;
2864       case MATAIJ_PRIVATE:
2865         new_local_type = MATSEQAIJ;
2866         bs = 1;
2867         break;
2868       case MATBAIJ_PRIVATE:
2869         new_local_type = MATSEQBAIJ;
2870         break;
2871       case MATSBAIJ_PRIVATE:
2872         new_local_type = MATSEQSBAIJ;
2873         break;
2874       default:
2875         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2876         break;
2877     }
2878   }
2879 
2880   /* create MATIS object */
2881   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2882   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2883   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2884   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2885   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2886   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2887 
2888   /* set values */
2889   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2890   ptr_vals = recv_buffer_vals;
2891   ptr_idxs = recv_buffer_idxs;
2892   for (i=0;i<n_recvs;i++) {
2893     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2894       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2895       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2896       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2897       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2898       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2899     }
2900     ptr_idxs += olengths_idxs[i];
2901     ptr_vals += olengths_vals[i];
2902   }
2903   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907 
2908   { /* check */
2909     Vec       lvec,rvec;
2910     PetscReal infty_error;
2911 
2912     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2913     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2914     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2915     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2916     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2917     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2918     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2919     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2920     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2921   }
2922 
2923   /* free workspace */
2924   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2925   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2926   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2927   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2928   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2929   if (isdense) {
2930     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2931     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2932   } else {
2933     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2934   }
2935   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2936   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2937   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2938   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2939   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2940   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2941   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2942   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2943   ierr = PetscFree(onodes);CHKERRQ(ierr);
2944   /* get back new mat */
2945   *mat_n = new_mat;
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2951 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2952 {
2953   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2954   PC_IS                  *pcis = (PC_IS*)pc->data;
2955   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2956   MatNullSpace           CoarseNullSpace=NULL;
2957   ISLocalToGlobalMapping coarse_islg;
2958   IS                     coarse_is;
2959   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2960   PetscInt               im_active=-1,active_procs=-1;
2961   PC                     pc_temp;
2962   PCType                 coarse_pc_type;
2963   KSPType                coarse_ksp_type;
2964   PetscBool              multilevel_requested,multilevel_allowed;
2965   PetscBool              setsym,issym,isbddc,isnn;
2966   MatStructure           matstruct;
2967   PetscErrorCode         ierr;
2968 
2969   PetscFunctionBegin;
2970   /* Assign global numbering to coarse dofs */
2971   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2972 
2973   /* infer some info from user */
2974   issym = PETSC_FALSE;
2975   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2976   multilevel_allowed = PETSC_FALSE;
2977   multilevel_requested = PETSC_FALSE;
2978   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2979   if (multilevel_requested) {
2980     /* count "active processes" */
2981     im_active = !!(pcis->n);
2982     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2983     if (active_procs/pcbddc->coarsening_ratio < 2) {
2984       multilevel_allowed = PETSC_FALSE;
2985     } else {
2986       multilevel_allowed = PETSC_TRUE;
2987     }
2988   }
2989 
2990   /* set defaults for coarse KSP and PC */
2991   if (multilevel_allowed) {
2992     if (issym) {
2993       coarse_ksp_type = KSPRICHARDSON;
2994     } else {
2995       coarse_ksp_type = KSPCHEBYSHEV;
2996     }
2997     coarse_pc_type = PCBDDC;
2998   } else {
2999     coarse_ksp_type = KSPPREONLY;
3000     coarse_pc_type = PCREDUNDANT;
3001   }
3002 
3003   /* create the coarse KSP object only once with defaults */
3004   if (!pcbddc->coarse_ksp) {
3005     char prefix[256],str_level[3];
3006     size_t len;
3007     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3008     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3009     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3010     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3011     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3012     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3013     /* prefix */
3014     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3015     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3016     if (!pcbddc->current_level) {
3017       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3018       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3019     } else {
3020       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3021       if (pcbddc->current_level>1) len -= 2;
3022       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3023       *(prefix+len)='\0';
3024       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3025       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3026     }
3027     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3028   }
3029   /* allow user customization */
3030   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3031   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3032   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3033 
3034   /* get some info after set from options */
3035   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3036   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3037   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3038   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3039     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3040     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3041     isbddc = PETSC_FALSE;
3042   }
3043 
3044   /* propagate BDDC info to the next level */
3045   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3046   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3047   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3048 
3049   /* creates temporary MATIS object for coarse matrix */
3050   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3051   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3052   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3053   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3054   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3055   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3058   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3059   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3060 
3061   /* assemble coarse matrix */
3062   if (isbddc || isnn) {
3063     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3064   } else {
3065     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3066   }
3067   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3068 
3069   /* create local to global scatters for coarse problem */
3070   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3071   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3072   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3073 
3074   /* propagate symmetry info to coarse matrix */
3075   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3076 
3077   /* Compute coarse null space (special handling by BDDC only) */
3078   if (pcbddc->NullSpace) {
3079     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3080     if (isbddc) {
3081       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3082     } else {
3083       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3084     }
3085   }
3086 
3087   /* set operators */
3088   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3089   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3090 
3091   /* additional KSP customization */
3092   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3093   if (max_it < 5) {
3094     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3095   }
3096   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3097 
3098 
3099   /* print some info if requested */
3100   if (pcbddc->dbg_flag) {
3101     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3102     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3103     if (!multilevel_allowed) {
3104       if (multilevel_requested) {
3105         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3106       } else if (pcbddc->max_levels) {
3107         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3108       }
3109     }
3110     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3111     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3112   }
3113 
3114   /* setup coarse ksp */
3115   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3116   if (pcbddc->dbg_flag) {
3117     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3118     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3119     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3120     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3121   }
3122 
3123   /* Check coarse problem if requested */
3124   if (pcbddc->dbg_flag) {
3125     KSP       check_ksp;
3126     KSPType   check_ksp_type;
3127     PC        check_pc;
3128     Vec       check_vec;
3129     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3130     PetscInt  its;
3131     PetscBool ispreonly,compute;
3132 
3133     /* Create ksp object suitable for estimation of extreme eigenvalues */
3134     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3135     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3136     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3137     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3138     if (ispreonly) {
3139       check_ksp_type = KSPPREONLY;
3140       compute = PETSC_FALSE;
3141     } else {
3142       if (issym) check_ksp_type = KSPCG;
3143       else check_ksp_type = KSPGMRES;
3144       compute = PETSC_TRUE;
3145     }
3146     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3147     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3148     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3149     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3150     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3151     /* create random vec */
3152     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3153     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3154     if (CoarseNullSpace) {
3155       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3156     }
3157     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3158     /* solve coarse problem */
3159     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3160     if (CoarseNullSpace) {
3161       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3162     }
3163     /* check coarse problem residual error */
3164     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3165     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3166     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3167     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3168     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3169     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3170     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3171     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3172     /* get eigenvalue estimation if preonly has not been requested */
3173     if (!ispreonly) {
3174       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3175       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3176       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3177     }
3178     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3179     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3180   }
3181   /* free memory */
3182   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3183   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 #undef __FUNCT__
3188 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3189 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3190 {
3191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3192   PC_IS*         pcis = (PC_IS*)pc->data;
3193   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3194   PetscInt       i,j,coarse_size;
3195   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3196   PetscErrorCode ierr;
3197 
3198   PetscFunctionBegin;
3199   /* get indices in local ordering for vertices and constraints */
3200   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3201   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3202   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3203   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3204   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3205   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3206   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3207 
3208   /* Compute global number of coarse dofs */
3209   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3210 
3211   /* check numbering */
3212   if (pcbddc->dbg_flag) {
3213     PetscScalar coarsesum,*array;
3214 
3215     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3216     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3217     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3218     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3219     for (i=0;i<pcbddc->local_primal_size;i++) {
3220       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3221     }
3222     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3223     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3224     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3225     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3226     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3227     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3230     for (i=0;i<pcis->n;i++) {
3231       if (array[i] == 1.0) {
3232         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3233       }
3234     }
3235     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3236     for (i=0;i<pcis->n;i++) {
3237       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3238     }
3239     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3240     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3241     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3242     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3243     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3244     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3245     if (pcbddc->dbg_flag > 1) {
3246       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3247       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3248       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3249       for (i=0;i<pcbddc->local_primal_size;i++) {
3250         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3251       }
3252       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3253     }
3254     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3255   }
3256   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3257   /* get back data */
3258   *coarse_size_n = coarse_size;
3259   *local_primal_indices_n = local_primal_indices;
3260   PetscFunctionReturn(0);
3261 }
3262 
3263