xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision df18702065187e6672c72c1017c7d33fa92bcb7d)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
126   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
127   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
128   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
129   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
130   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
131   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
132   if (n_constraints) {
133     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
134     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
135     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "PCBDDCSetUpCorrection"
142 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
143 {
144   PetscErrorCode         ierr;
145   /* pointers to pcis and pcbddc */
146   PC_IS*                 pcis = (PC_IS*)pc->data;
147   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
148   /* submatrices of local problem */
149   Mat                    A_RV,A_VR,A_VV;
150   /* working matrices */
151   Mat                    M1,M2,M3,C_CR;
152   /* working vectors */
153   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
154   /* additional working stuff */
155   IS                     is_aux;
156   ISLocalToGlobalMapping BtoNmap;
157   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
158   const PetscScalar      *array,*row_cmat_values;
159   const PetscInt         *row_cmat_indices,*idx_R_local;
160   PetscInt               *vertices,*idx_V_B,*auxindices;
161   PetscInt               n_vertices,n_constraints,size_of_constraint;
162   PetscInt               i,j,n_R,n_D,n_B;
163   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
164   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
165   MatType                impMatType;
166   /* some shortcuts to scalars */
167   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
168   /* for debugging purposes */
169   PetscReal              *coarsefunctions_errors,*constraints_errors;
170 
171   PetscFunctionBegin;
172   /* get number of vertices and their local indices */
173   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
174   n_constraints = pcbddc->local_primal_size-n_vertices;
175   /* Set Non-overlapping dimensions */
176   n_B = pcis->n_B; n_D = pcis->n - n_B;
177   n_R = pcis->n-n_vertices;
178 
179   /* Set types for local objects needed by BDDC precondtioner */
180   impMatType = MATSEQDENSE;
181 
182   /* Allocating some extra storage just to be safe */
183   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
184   for (i=0;i<pcis->n;i++) auxindices[i]=i;
185 
186   /* vertices in boundary numbering */
187   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
188   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
189   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
191   if (i != n_vertices) {
192     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
193   }
194 
195   /* Precompute stuffs needed for preprocessing and application of BDDC*/
196   if (n_constraints) {
197     /* work vectors */
198     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
199     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
200     /* auxiliary matrices */
201     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
202     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
203     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
204     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
205 
206     /* Extract constraints on R nodes: C_{CR}  */
207     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
208     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
209     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
210 
211     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
212     for (i=0;i<n_constraints;i++) {
213       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
214       /* Get row of constraint matrix in R numbering */
215       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
216       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
217       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
219       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
220       /* Solve for row of constraint matrix in R numbering */
221       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
222       /* Set values in local_auxmat2 */
223       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
224       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
225       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226     }
227     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
228     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
229     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
230 
231     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
232     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
233     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
234     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
235     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
236     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
237     ierr = MatSetUp(M1);CHKERRQ(ierr);
238     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
239     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
240     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
241     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
242     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
243     ierr = MatDestroy(&M2);CHKERRQ(ierr);
244     ierr = MatDestroy(&M3);CHKERRQ(ierr);
245     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
246     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
247   }
248 
249   /* Get submatrices from subdomain matrix */
250   if (n_vertices) {
251     PetscInt ibs,mbs;
252     PetscBool issbaij;
253     Mat newmat;
254 
255     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
256     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
257     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
258     if (ibs != mbs) { /* need to convert to SEQAIJ */
259       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
260       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
261       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
263       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
264     } else {
265       /* this is safe */
266       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
267       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
268       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
269         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
270         /* which of the two approaches is faster? */
271         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
272         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
273         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
275         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
276       } else {
277         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
278         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
279       }
280     }
281     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
282     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
283     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
284   }
285 
286   /* Matrix of coarse basis functions (local) */
287   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
288   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
289   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
290   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
291   if (pcbddc->switch_static || pcbddc->dbg_flag) {
292     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
293     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
294     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
295     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
296   }
297 
298   if (pcbddc->dbg_flag) {
299     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
300     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
301     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
302   }
303   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
304   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
305 
306   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
307 
308   /* vertices */
309   for (i=0;i<n_vertices;i++) {
310     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
311     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
312     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
313     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
314     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
315     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
316     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
317     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
318     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
319     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
320     if (n_constraints) {
321       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
322       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
323       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
324     }
325     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
326     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
327 
328     /* Set values in coarse basis function and subdomain part of coarse_mat */
329     /* coarse basis functions */
330     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
331     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
334     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
335     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
337     if (pcbddc->switch_static || pcbddc->dbg_flag) {
338       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
339       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
340       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
341       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
342       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343     }
344     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
345     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
346     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
347     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     if (n_constraints) {
349       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
350       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
351       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
352     }
353 
354     /* check */
355     if (pcbddc->dbg_flag) {
356       /* assemble subdomain vector on local nodes */
357       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
358       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
359       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
360       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
363       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
364       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
365       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
366       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
367       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
368       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       if (n_constraints) {
370         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
371         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
372         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
373       }
374       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
376       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
377       /* check saddle point solution */
378       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
379       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
380       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
381       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
382       /* shift by the identity matrix */
383       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
384       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
385       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
386       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
387     }
388   }
389 
390   /* constraints */
391   for (i=0;i<n_constraints;i++) {
392     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
393     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
394     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
395     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
396     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
397     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
398     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
399     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
400     if (n_vertices) {
401       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
402     }
403     /* Set values in coarse basis function and subdomain part of coarse_mat */
404     /* coarse basis functions */
405     j = i+n_vertices; /* don't touch this! */
406     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
407     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
409     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
410     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     if (pcbddc->switch_static || pcbddc->dbg_flag) {
413       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
414       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
415       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
416       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
417       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418     }
419     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
420     if (n_vertices) {
421       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
422       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
424     }
425     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
426     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
427     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
428 
429     if (pcbddc->dbg_flag) {
430       /* assemble subdomain vector on nodes */
431       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
432       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
433       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
434       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
436       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
437       /* assemble subdomain vector of lagrange multipliers */
438       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
439       if (n_vertices) {
440         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
441         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
442         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
443       }
444       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
445       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
446       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
450       /* check saddle point solution */
451       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
452       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
453       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
454       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
455       /* shift by the identity matrix */
456       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
457       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
458       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
459       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
460     }
461   }
462   /* call assembling routines for local coarse basis */
463   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
464   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
465   if (pcbddc->switch_static || pcbddc->dbg_flag) {
466     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468   }
469 
470   /* compute other basis functions for non-symmetric problems */
471   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
472   if (!setsym || (setsym && !issym)) {
473     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
474     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
475     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
476     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
477     if (pcbddc->switch_static || pcbddc->dbg_flag) {
478       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
479       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
480       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
481       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
482     }
483     for (i=0;i<pcbddc->local_primal_size;i++) {
484       if (n_constraints) {
485         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
486         for (j=0;j<n_constraints;j++) {
487           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
488         }
489         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
490         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
491       }
492       if (i<n_vertices) {
493         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
494         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
495         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
496         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
497         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
498         if (n_constraints) {
499           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
500         }
501       } else {
502         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
503       }
504       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
505       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
506       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
509       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       if (i<n_vertices) {
512         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
513       }
514       if (pcbddc->switch_static || pcbddc->dbg_flag) {
515         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
516         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
518         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
519         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520       }
521 
522       if (pcbddc->dbg_flag) {
523         /* assemble subdomain vector on nodes */
524         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
525         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
526         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
527         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         if (i<n_vertices) {
529           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
530         }
531         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
532         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
533         /* assemble subdomain vector of lagrange multipliers */
534         for (j=0;j<pcbddc->local_primal_size;j++) {
535           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
536         }
537         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
538         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
539         /* check saddle point solution */
540         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
541         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
542         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
543         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
544         /* shift by the identity matrix */
545         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
546         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
547         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
548         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
549       }
550     }
551     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     if (pcbddc->switch_static || pcbddc->dbg_flag) {
554       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556     }
557   }
558   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
559   /* Checking coarse_sub_mat and coarse basis functios */
560   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
561   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
562   if (pcbddc->dbg_flag) {
563     Mat         coarse_sub_mat;
564     Mat         AUXMAT,TM1,TM2,TM3,TM4;
565     Mat         coarse_phi_D,coarse_phi_B;
566     Mat         coarse_psi_D,coarse_psi_B;
567     Mat         A_II,A_BB,A_IB,A_BI;
568     MatType     checkmattype=MATSEQAIJ;
569     PetscReal   real_value;
570 
571     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
572     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
573     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
574     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
575     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
576     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
577     if (pcbddc->coarse_psi_B) {
578       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
579       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
580     }
581     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
582 
583     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
584     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
585     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
586     if (pcbddc->coarse_psi_B) {
587       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
588       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
589       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
590       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
591       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
592       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
593       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
594       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
595       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
596       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
597       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
598       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
599     } else {
600       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
601       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
602       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
603       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
604       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
605       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
606       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
607       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
608     }
609     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
610     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
611     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
612     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
613     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
614     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
616     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
617     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
618     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
619     for (i=0;i<pcbddc->local_primal_size;i++) {
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
621     }
622     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
623     for (i=0;i<pcbddc->local_primal_size;i++) {
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
625     }
626     if (pcbddc->coarse_psi_B) {
627       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
628       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
629         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
630       }
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
632       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
633         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
634       }
635     }
636     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
637     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
638     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
639     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
640     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
641     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
642     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
643     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
644     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
645     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
646     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
647     if (pcbddc->coarse_psi_B) {
648       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
649       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
650     }
651     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
652     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
653     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
654     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
655   }
656   /* free memory */
657   if (n_vertices) {
658     ierr = PetscFree(vertices);CHKERRQ(ierr);
659     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
660     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
661     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
662     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
663     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
664   }
665   if (n_constraints) {
666     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
667     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
668     ierr = MatDestroy(&M1);CHKERRQ(ierr);
669     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
670   }
671   ierr = PetscFree(auxindices);CHKERRQ(ierr);
672   /* get back data */
673   *coarse_submat_vals_n = coarse_submat_vals;
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNCT__
678 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
679 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
680 {
681   PC_IS*            pcis = (PC_IS*)(pc->data);
682   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
683   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
684   PetscBool         issbaij,isbaij;
685   /* manage repeated solves */
686   MatReuse          reuse;
687   MatStructure      matstruct;
688   PetscErrorCode    ierr;
689 
690   PetscFunctionBegin;
691   /* get mat flags */
692   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
693   reuse = MAT_INITIAL_MATRIX;
694   if (pc->setupcalled) {
695     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
696     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
697     if (matstruct == SAME_NONZERO_PATTERN) {
698       reuse = MAT_REUSE_MATRIX;
699     } else {
700       reuse = MAT_INITIAL_MATRIX;
701     }
702   }
703   if (reuse == MAT_INITIAL_MATRIX) {
704     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
705     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
706     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
707     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
708     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
709   }
710 
711   /* transform local matrices if needed */
712   if (pcbddc->use_change_of_basis) {
713     Mat         change_mat_all;
714     PetscScalar *row_cmat_values;
715     PetscInt    *row_cmat_indices;
716     PetscInt    *nnz,*is_indices,*temp_indices;
717     PetscInt    i,j,k,n_D,n_B;
718 
719     /* Get Non-overlapping dimensions */
720     n_B = pcis->n_B;
721     n_D = pcis->n-n_B;
722 
723     /* compute nonzero structure of change of basis on all local nodes */
724     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
725     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
727     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     k=1;
730     for (i=0;i<n_B;i++) {
731       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
732       nnz[is_indices[i]]=j;
733       if (k < j) k = j;
734       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
735     }
736     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     /* assemble change of basis matrix on the whole set of local dofs */
738     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
739     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
740     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
741     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
742     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
743     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
744     for (i=0;i<n_D;i++) {
745       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
749     for (i=0;i<n_B;i++) {
750       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
751       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
752       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
753       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
754     }
755     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
758     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
759     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
760     if (!issbaij && !isbaij) {
761       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
762     } else {
763       Mat work_mat;
764       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
765       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
766       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
767     }
768     /*
769     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
770     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
771     */
772     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
773     ierr = PetscFree(nnz);CHKERRQ(ierr);
774     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
775   } else {
776     /* without change of basis, the local matrix is unchanged */
777     if (!pcbddc->local_mat) {
778       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
779       pcbddc->local_mat = matis->A;
780     }
781   }
782 
783   /* get submatrices */
784   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
785   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
786   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
787   if (!issbaij) {
788     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
789     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
790   } else {
791     Mat newmat;
792     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
793     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
794     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
795     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
802 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
803 {
804   PC_IS*         pcis = (PC_IS*)(pc->data);
805   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
806   IS             is_aux1,is_aux2;
807   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
808   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
809   PetscInt       vbs,bs;
810   PetscBT        bitmask;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   /* Set Non-overlapping dimensions */
815   n_B = pcis->n_B; n_D = pcis->n - n_B;
816   /* get vertex indices from constraint matrix */
817   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
818 
819   /* create auxiliary bitmask */
820   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
821   for (i=0;i<n_vertices;i++) {
822     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
823   }
824 
825   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
826   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
827   for (i=0, n_R=0; i<pcis->n; i++) {
828     if (!PetscBTLookup(bitmask,i)) {
829       idx_R_local[n_R] = i;
830       n_R++;
831     }
832   }
833 
834   /* Block code */
835   vbs = 1;
836   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
837   if (bs>1 && !(n_vertices%bs)) {
838     PetscBool is_blocked = PETSC_TRUE;
839     PetscInt  *vary;
840     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
841     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
842     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
843     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
844     for (i=0; i<n_vertices; i++) {
845       if (vary[i]!=0 && vary[i]!=bs) {
846         is_blocked = PETSC_FALSE;
847         break;
848       }
849     }
850     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
851       vbs = bs;
852       for (i=0;i<n_R/vbs;i++) {
853         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
854       }
855     }
856     ierr = PetscFree(vary);CHKERRQ(ierr);
857   }
858   ierr = PetscFree(vertices);CHKERRQ(ierr);
859   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
860   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
861 
862   /* print some info if requested */
863   if (pcbddc->dbg_flag) {
864     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
865     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
866     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
867     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
868     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
869     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
870     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
871   }
872 
873   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
874   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
875   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
876   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
877   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
878   for (i=0; i<n_D; i++) {
879     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
880   }
881   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
882   for (i=0, j=0; i<n_R; i++) {
883     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
884       aux_array1[j++] = i;
885     }
886   }
887   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0, j=0; i<n_B; i++) {
890     if (!PetscBTLookup(bitmask,is_indices[i])) {
891       aux_array2[j++] = i;
892     }
893   }
894   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
895   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
896   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
897   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
898   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
899 
900   if (pcbddc->switch_static || pcbddc->dbg_flag) {
901     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
902     for (i=0, j=0; i<n_R; i++) {
903       if (PetscBTLookup(bitmask,idx_R_local[i])) {
904         aux_array1[j++] = i;
905       }
906     }
907     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
908     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
909     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
910   }
911   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
912   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
913   PetscFunctionReturn(0);
914 }
915 
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
919 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
920 {
921   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
922   PC_IS          *pcis = (PC_IS*)pc->data;
923   PC             pc_temp;
924   Mat            A_RR;
925   Vec            vec1,vec2,vec3;
926   MatStructure   matstruct;
927   PetscScalar    m_one = -1.0;
928   PetscReal      value;
929   PetscInt       n_D,n_R,ibs,mbs;
930   PetscBool      use_exact,use_exact_reduced,issbaij;
931   PetscErrorCode ierr;
932   /* prefixes stuff */
933   char           dir_prefix[256],neu_prefix[256],str_level[3];
934   size_t         len;
935 
936   PetscFunctionBegin;
937   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
938 
939   /* compute prefixes */
940   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
941   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
942   if (!pcbddc->current_level) {
943     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
944     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
945     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
946     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
947   } else {
948     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
949     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
950     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
951     len -= 15; /* remove "pc_bddc_coarse_" */
952     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
953     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
954     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
955     *(dir_prefix+len)='\0';
956     *(neu_prefix+len)='\0';
957     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
958     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
959     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
960     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
961   }
962 
963   /* DIRICHLET PROBLEM */
964   /* Matrix for Dirichlet problem is pcis->A_II */
965   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
966   if (!pcbddc->ksp_D) { /* create object if not yet build */
967     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
968     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
969     /* default */
970     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
971     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
972     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
973     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
974     if (issbaij) {
975       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
976     } else {
977       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
978     }
979     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
980   }
981   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
982   /* Allow user's customization */
983   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
984   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
985   if (!n_D) {
986     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
987     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
988   }
989   /* Set Up KSP for Dirichlet problem of BDDC */
990   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
991   /* set ksp_D into pcis data */
992   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
993   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
994   pcis->ksp_D = pcbddc->ksp_D;
995 
996   /* NEUMANN PROBLEM */
997   /* Matrix for Neumann problem is A_RR -> we need to create it */
998   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
999   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1000   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1001   if (ibs != mbs) {
1002     Mat newmat;
1003     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1004     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1005     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1006   } else {
1007     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1008   }
1009   if (!pcbddc->ksp_R) { /* create object if not yet build */
1010     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1011     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1012     /* default */
1013     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1014     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1015     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1016     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1017     if (issbaij) {
1018       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1019     } else {
1020       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1021     }
1022     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1023   }
1024   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1025   /* Allow user's customization */
1026   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1027   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1028   if (!n_R) {
1029     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1030     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1031   }
1032   /* Set Up KSP for Neumann problem of BDDC */
1033   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1034 
1035   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1036 
1037   /* Dirichlet */
1038   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1039   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1040   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1041   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1042   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1043   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1044   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1045   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1046   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1047   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1048   /* need to be adapted? */
1049   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1050   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1051   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1052   /* print info */
1053   if (pcbddc->dbg_flag) {
1054     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1055     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1056     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1057     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1058     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1059   }
1060   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1061     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1062   }
1063 
1064   /* Neumann */
1065   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1066   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1067   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1068   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1069   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1070   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1071   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1072   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1073   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1074   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1075   /* need to be adapted? */
1076   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1077   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1078   /* print info */
1079   if (pcbddc->dbg_flag) {
1080     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1081     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1082   }
1083   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1084     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1085   }
1086   /* free Neumann problem's matrix */
1087   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1093 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1094 {
1095   PetscErrorCode ierr;
1096   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1097 
1098   PetscFunctionBegin;
1099   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1100   if (pcbddc->local_auxmat1) {
1101     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1102     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1109 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1110 {
1111   PetscErrorCode ierr;
1112   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1113   PC_IS*            pcis = (PC_IS*)  (pc->data);
1114   const PetscScalar zero = 0.0;
1115 
1116   PetscFunctionBegin;
1117   /* Application of PHI^T (or PSI^T)  */
1118   if (pcbddc->coarse_psi_B) {
1119     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1120     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1121   } else {
1122     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1123     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1124   }
1125   /* Scatter data of coarse_rhs */
1126   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1127   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1128 
1129   /* Local solution on R nodes */
1130   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1131   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1132   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133   if (pcbddc->switch_static) {
1134     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1135     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   }
1137   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1138   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1139   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   if (pcbddc->switch_static) {
1142     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   }
1145 
1146   /* Coarse solution */
1147   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1149     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1150   }
1151   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153 
1154   /* Sum contributions from two levels */
1155   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1156   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1162 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1163 {
1164   PetscErrorCode ierr;
1165   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1166 
1167   PetscFunctionBegin;
1168   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1174 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1175 {
1176   PetscErrorCode ierr;
1177   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1178 
1179   PetscFunctionBegin;
1180   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /* uncomment for testing purposes */
1185 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1188 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1189 {
1190   PetscErrorCode    ierr;
1191   PC_IS*            pcis = (PC_IS*)(pc->data);
1192   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1193   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1194   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1195   MatType           impMatType=MATSEQAIJ;
1196   /* one and zero */
1197   PetscScalar       one=1.0,zero=0.0;
1198   /* space to store constraints and their local indices */
1199   PetscScalar       *temp_quadrature_constraint;
1200   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1201   /* iterators */
1202   PetscInt          i,j,k,total_counts,temp_start_ptr;
1203   /* stuff to store connected components stored in pcbddc->mat_graph */
1204   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1205   PetscInt          n_ISForFaces,n_ISForEdges;
1206   /* near null space stuff */
1207   MatNullSpace      nearnullsp;
1208   const Vec         *nearnullvecs;
1209   Vec               *localnearnullsp;
1210   PetscBool         nnsp_has_cnst;
1211   PetscInt          nnsp_size;
1212   PetscScalar       *array;
1213   /* BLAS integers */
1214   PetscBLASInt      lwork,lierr;
1215   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1216   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1217   /* LAPACK working arrays for SVD or POD */
1218   PetscBool         skip_lapack;
1219   PetscScalar       *work;
1220   PetscReal         *singular_vals;
1221 #if defined(PETSC_USE_COMPLEX)
1222   PetscReal         *rwork;
1223 #endif
1224 #if defined(PETSC_MISSING_LAPACK_GESVD)
1225   PetscBLASInt      Blas_one_2=1;
1226   PetscScalar       *temp_basis,*correlation_mat;
1227 #else
1228   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1229   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1230 #endif
1231   /* change of basis */
1232   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1233   PetscBool         boolforchange,qr_needed;
1234   PetscBT           touched,change_basis,qr_needed_idx;
1235   /* auxiliary stuff */
1236   PetscInt          *nnz,*is_indices,*local_to_B;
1237   /* some quantities */
1238   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1239   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1240 
1241 
1242   PetscFunctionBegin;
1243   /* Destroy Mat objects computed previously */
1244   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1245   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1246   /* Get index sets for faces, edges and vertices from graph */
1247   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1248     pcbddc->use_vertices = PETSC_TRUE;
1249   }
1250   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1251   /* HACK: provide functions to set change of basis */
1252   if (!ISForVertices && pcbddc->NullSpace) {
1253     pcbddc->use_change_of_basis = PETSC_TRUE;
1254     pcbddc->use_change_on_faces = PETSC_FALSE;
1255   }
1256   /* print some info */
1257   if (pcbddc->dbg_flag) {
1258     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1259     i = 0;
1260     if (ISForVertices) {
1261       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1266     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1267   }
1268   /* check if near null space is attached to global mat */
1269   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1270   if (nearnullsp) {
1271     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1272   } else { /* if near null space is not provided BDDC uses constants by default */
1273     nnsp_size = 0;
1274     nnsp_has_cnst = PETSC_TRUE;
1275   }
1276   /* get max number of constraints on a single cc */
1277   max_constraints = nnsp_size;
1278   if (nnsp_has_cnst) max_constraints++;
1279 
1280   /*
1281        Evaluate maximum storage size needed by the procedure
1282        - temp_indices will contain start index of each constraint stored as follows
1283        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1284        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1285        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1286                                                                                                                                                          */
1287   total_counts = n_ISForFaces+n_ISForEdges;
1288   total_counts *= max_constraints;
1289   n_vertices = 0;
1290   if (ISForVertices) {
1291     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1292   }
1293   total_counts += n_vertices;
1294   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1295   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1296   total_counts = 0;
1297   max_size_of_constraint = 0;
1298   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1299     if (i<n_ISForEdges) {
1300       used_IS = &ISForEdges[i];
1301     } else {
1302       used_IS = &ISForFaces[i-n_ISForEdges];
1303     }
1304     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1305     total_counts += j;
1306     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1307   }
1308   total_counts *= max_constraints;
1309   total_counts += n_vertices;
1310   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1311   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1312   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1313   /* local to boundary numbering */
1314   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1315   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1316   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1317   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1318   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1319   /* get local part of global near null space vectors */
1320   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1321   for (k=0;k<nnsp_size;k++) {
1322     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1323     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   }
1326 
1327   /* whether or not to skip lapack calls */
1328   skip_lapack = PETSC_TRUE;
1329   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1330 
1331   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1332   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1333     PetscScalar temp_work;
1334 #if defined(PETSC_MISSING_LAPACK_GESVD)
1335     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1336     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1337     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1338     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1339 #if defined(PETSC_USE_COMPLEX)
1340     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1341 #endif
1342     /* now we evaluate the optimal workspace using query with lwork=-1 */
1343     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1344     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1345     lwork = -1;
1346     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1347 #if !defined(PETSC_USE_COMPLEX)
1348     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1349 #else
1350     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1351 #endif
1352     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1353     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1354 #else /* on missing GESVD */
1355     /* SVD */
1356     PetscInt max_n,min_n;
1357     max_n = max_size_of_constraint;
1358     min_n = max_constraints;
1359     if (max_size_of_constraint < max_constraints) {
1360       min_n = max_size_of_constraint;
1361       max_n = max_constraints;
1362     }
1363     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1364 #if defined(PETSC_USE_COMPLEX)
1365     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1366 #endif
1367     /* now we evaluate the optimal workspace using query with lwork=-1 */
1368     lwork = -1;
1369     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1370     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1371     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1372     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1373 #if !defined(PETSC_USE_COMPLEX)
1374     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1375 #else
1376     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1377 #endif
1378     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1379     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1380 #endif /* on missing GESVD */
1381     /* Allocate optimal workspace */
1382     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1383     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1384   }
1385   /* Now we can loop on constraining sets */
1386   total_counts = 0;
1387   temp_indices[0] = 0;
1388   /* vertices */
1389   if (ISForVertices) {
1390     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1391     if (nnsp_has_cnst) { /* consider all vertices */
1392       for (i=0;i<n_vertices;i++) {
1393         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1394         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1395         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1396         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1397         total_counts++;
1398       }
1399     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1400       PetscBool used_vertex;
1401       for (i=0;i<n_vertices;i++) {
1402         used_vertex = PETSC_FALSE;
1403         k = 0;
1404         while (!used_vertex && k<nnsp_size) {
1405           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1406           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1407             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1408             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1409             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1410             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1411             total_counts++;
1412             used_vertex = PETSC_TRUE;
1413           }
1414           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1415           k++;
1416         }
1417       }
1418     }
1419     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1420     n_vertices = total_counts;
1421   }
1422 
1423   /* edges and faces */
1424   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1425     if (i<n_ISForEdges) {
1426       used_IS = &ISForEdges[i];
1427       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1428     } else {
1429       used_IS = &ISForFaces[i-n_ISForEdges];
1430       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1431     }
1432     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1433     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1434     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1435     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     /* change of basis should not be performed on local periodic nodes */
1437     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1438     if (nnsp_has_cnst) {
1439       PetscScalar quad_value;
1440       temp_constraints++;
1441       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1442       for (j=0;j<size_of_constraint;j++) {
1443         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1444         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1445         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1446       }
1447       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1448       total_counts++;
1449     }
1450     for (k=0;k<nnsp_size;k++) {
1451       PetscReal real_value;
1452       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1453       for (j=0;j<size_of_constraint;j++) {
1454         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1455         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1456         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1457       }
1458       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1459       /* check if array is null on the connected component */
1460       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1461       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1462       if (real_value > 0.0) { /* keep indices and values */
1463         temp_constraints++;
1464         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1465         total_counts++;
1466       }
1467     }
1468     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1469     valid_constraints = temp_constraints;
1470     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1471     if (!pcbddc->use_nnsp_true && temp_constraints) {
1472       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1473 
1474 #if defined(PETSC_MISSING_LAPACK_GESVD)
1475       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1476          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1477          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1478             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1479             from that computed using LAPACKgesvd
1480          -> This is due to a different computation of eigenvectors in LAPACKheev
1481          -> The quality of the POD-computed basis will be the same */
1482       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1483       /* Store upper triangular part of correlation matrix */
1484       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1485       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1486       for (j=0;j<temp_constraints;j++) {
1487         for (k=0;k<j+1;k++) {
1488           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1489         }
1490       }
1491       /* compute eigenvalues and eigenvectors of correlation matrix */
1492       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1493       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1494 #if !defined(PETSC_USE_COMPLEX)
1495       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1496 #else
1497       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1498 #endif
1499       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1500       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1501       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1502       j=0;
1503       while (j < temp_constraints && singular_vals[j] < tol) j++;
1504       total_counts=total_counts-j;
1505       valid_constraints = temp_constraints-j;
1506       /* scale and copy POD basis into used quadrature memory */
1507       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1508       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1513       if (j<temp_constraints) {
1514         PetscInt ii;
1515         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1516         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1517         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1518         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1519         for (k=0;k<temp_constraints-j;k++) {
1520           for (ii=0;ii<size_of_constraint;ii++) {
1521             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1522           }
1523         }
1524       }
1525 #else  /* on missing GESVD */
1526       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1527       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1528       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1529       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1530 #if !defined(PETSC_USE_COMPLEX)
1531       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1532 #else
1533       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1534 #endif
1535       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1536       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1537       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1538       k = temp_constraints;
1539       if (k > size_of_constraint) k = size_of_constraint;
1540       j = 0;
1541       while (j < k && singular_vals[k-j-1] < tol) j++;
1542       total_counts = total_counts-temp_constraints+k-j;
1543       valid_constraints = k-j;
1544 #endif /* on missing GESVD */
1545     }
1546     /* setting change_of_basis flag is safe now */
1547     if (boolforchange) {
1548       for (j=0;j<valid_constraints;j++) {
1549         PetscBTSet(change_basis,total_counts-j-1);
1550       }
1551     }
1552   }
1553   /* free index sets of faces, edges and vertices */
1554   for (i=0;i<n_ISForFaces;i++) {
1555     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1556   }
1557   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1558   for (i=0;i<n_ISForEdges;i++) {
1559     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1560   }
1561   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1562   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1563 
1564   /* free workspace */
1565   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1566     ierr = PetscFree(work);CHKERRQ(ierr);
1567 #if defined(PETSC_USE_COMPLEX)
1568     ierr = PetscFree(rwork);CHKERRQ(ierr);
1569 #endif
1570     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1571 #if defined(PETSC_MISSING_LAPACK_GESVD)
1572     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1573     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1574 #endif
1575   }
1576   for (k=0;k<nnsp_size;k++) {
1577     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1578   }
1579   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1580 
1581   /* set quantities in pcbddc data structure */
1582   /* n_vertices defines the number of subdomain corners in the primal space */
1583   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1584   pcbddc->local_primal_size = total_counts;
1585   pcbddc->n_vertices = n_vertices;
1586   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1587 
1588   /* Create constraint matrix */
1589   /* The constraint matrix is used to compute the l2g map of primal dofs */
1590   /* so we need to set it up properly either with or without change of basis */
1591   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1592   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1593   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1594   /* array to compute a local numbering of constraints : vertices first then constraints */
1595   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1596   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1597   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1598   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1599   /* auxiliary stuff for basis change */
1600   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1601   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1602 
1603   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1604   total_primal_vertices=0;
1605   for (i=0;i<pcbddc->local_primal_size;i++) {
1606     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1607     if (size_of_constraint == 1) {
1608       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1609       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1610       aux_primal_minloc[total_primal_vertices]=0;
1611       total_primal_vertices++;
1612     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1613       PetscInt min_loc,min_index;
1614       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1615       /* find first untouched local node */
1616       k = 0;
1617       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1618       min_index = global_indices[k];
1619       min_loc = k;
1620       /* search the minimum among global nodes already untouched on the cc */
1621       for (k=1;k<size_of_constraint;k++) {
1622         /* there can be more than one constraint on a single connected component */
1623         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1624           min_index = global_indices[k];
1625           min_loc = k;
1626         }
1627       }
1628       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1629       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1630       aux_primal_minloc[total_primal_vertices]=min_loc;
1631       total_primal_vertices++;
1632     }
1633   }
1634   /* determine if a QR strategy is needed for change of basis */
1635   qr_needed = PETSC_FALSE;
1636   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1637   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1638     if (PetscBTLookup(change_basis,i)) {
1639       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1640       j = 0;
1641       for (k=0;k<size_of_constraint;k++) {
1642         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1643           j++;
1644         }
1645       }
1646       /* found more than one primal dof on the cc */
1647       if (j > 1) {
1648         PetscBTSet(qr_needed_idx,i);
1649         qr_needed = PETSC_TRUE;
1650       }
1651     }
1652   }
1653   /* free workspace */
1654   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1655 
1656   /* permute indices in order to have a sorted set of vertices */
1657   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1658 
1659   /* nonzero structure of constraint matrix */
1660   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1661   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1662   j=total_primal_vertices;
1663   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1664     if (!PetscBTLookup(change_basis,i)) {
1665       nnz[j]=temp_indices[i+1]-temp_indices[i];
1666       j++;
1667     }
1668   }
1669   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1670   ierr = PetscFree(nnz);CHKERRQ(ierr);
1671   /* set values in constraint matrix */
1672   for (i=0;i<total_primal_vertices;i++) {
1673     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1674   }
1675   total_counts = total_primal_vertices;
1676   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1677     if (!PetscBTLookup(change_basis,i)) {
1678       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1679       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1680       total_counts++;
1681     }
1682   }
1683   /* assembling */
1684   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   /*
1687   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1688   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1689   */
1690   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1691   if (pcbddc->use_change_of_basis) {
1692     /* dual and primal dofs on a single cc */
1693     PetscInt     dual_dofs,primal_dofs;
1694     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1695     PetscInt     primal_counter;
1696     /* working stuff for GEQRF */
1697     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1698     PetscBLASInt lqr_work;
1699     /* working stuff for UNGQR */
1700     PetscScalar  *gqr_work,lgqr_work_t;
1701     PetscBLASInt lgqr_work;
1702     /* working stuff for TRTRS */
1703     PetscScalar  *trs_rhs;
1704     PetscBLASInt Blas_NRHS;
1705     /* pointers for values insertion into change of basis matrix */
1706     PetscInt     *start_rows,*start_cols;
1707     PetscScalar  *start_vals;
1708     /* working stuff for values insertion */
1709     PetscBT      is_primal;
1710 
1711     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1712     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1713     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1714     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1715     /* work arrays */
1716     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1717     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1718     /* nonzeros per row */
1719     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1720       if (PetscBTLookup(change_basis,i)) {
1721         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1722         if (PetscBTLookup(qr_needed_idx,i)) {
1723           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1724         } else {
1725           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1726           /* get local primal index on the cc */
1727           j = 0;
1728           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1729           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1730         }
1731       }
1732     }
1733     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1734     ierr = PetscFree(nnz);CHKERRQ(ierr);
1735     /* Set initial identity in the matrix */
1736     for (i=0;i<pcis->n_B;i++) {
1737       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1738     }
1739 
1740     if (pcbddc->dbg_flag) {
1741       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1742       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1743     }
1744 
1745 
1746     /* Now we loop on the constraints which need a change of basis */
1747     /*
1748        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1749        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1750 
1751        Basic blocks of change of basis matrix T computed
1752 
1753           - Using the following block transformation if there is only a primal dof on the cc
1754             (in the example, primal dof is the last one of the edge in LOCAL ordering
1755              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1756             | 1        0   ...        0     1 |
1757             | 0        1   ...        0     1 |
1758             |              ...                |
1759             | 0        ...            1     1 |
1760             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1761 
1762           - via QR decomposition of constraints otherwise
1763     */
1764     if (qr_needed) {
1765       /* space to store Q */
1766       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1767       /* first we issue queries for optimal work */
1768       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1769       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1770       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1771       lqr_work = -1;
1772       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1773       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1774       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1775       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1776       lgqr_work = -1;
1777       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1778       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1779       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1780       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1781       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1782       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1783       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1784       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1785       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1786       /* array to store scaling factors for reflectors */
1787       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1788       /* array to store rhs and solution of triangular solver */
1789       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1790       /* allocating workspace for check */
1791       if (pcbddc->dbg_flag) {
1792         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1793       }
1794     }
1795     /* array to store whether a node is primal or not */
1796     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1797     for (i=0;i<total_primal_vertices;i++) {
1798       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1799     }
1800 
1801     /* loop on constraints and see whether or not they need a change of basis and compute it */
1802     /* -> using implicit ordering contained in temp_indices data */
1803     total_counts = pcbddc->n_vertices;
1804     primal_counter = total_counts;
1805     while (total_counts<pcbddc->local_primal_size) {
1806       primal_dofs = 1;
1807       if (PetscBTLookup(change_basis,total_counts)) {
1808         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1809         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1810           primal_dofs++;
1811         }
1812         /* get constraint info */
1813         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1814         dual_dofs = size_of_constraint-primal_dofs;
1815 
1816         if (pcbddc->dbg_flag) {
1817           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1818         }
1819 
1820         if (primal_dofs > 1) { /* QR */
1821 
1822           /* copy quadrature constraints for change of basis check */
1823           if (pcbddc->dbg_flag) {
1824             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1825           }
1826           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1827           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1828 
1829           /* compute QR decomposition of constraints */
1830           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1831           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1832           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1833           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1834           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1835           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1836           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1837 
1838           /* explictly compute R^-T */
1839           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1840           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1841           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1842           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1843           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1844           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1845           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1846           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1847           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1848           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1849 
1850           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1851           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1852           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1854           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1855           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1856           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1857           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1858           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1859 
1860           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1861              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1862              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1863           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1864           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1869           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1870           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1871           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1872           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1873 
1874           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1875           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1876           /* insert cols for primal dofs */
1877           for (j=0;j<primal_dofs;j++) {
1878             start_vals = &qr_basis[j*size_of_constraint];
1879             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1880             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1881           }
1882           /* insert cols for dual dofs */
1883           for (j=0,k=0;j<dual_dofs;k++) {
1884             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1885               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1886               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1887               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1888               j++;
1889             }
1890           }
1891 
1892           /* check change of basis */
1893           if (pcbddc->dbg_flag) {
1894             PetscInt   ii,jj;
1895             PetscBool valid_qr=PETSC_TRUE;
1896             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1897             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1898             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1899             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1901             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1902             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1903             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1904             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1905             for (jj=0;jj<size_of_constraint;jj++) {
1906               for (ii=0;ii<primal_dofs;ii++) {
1907                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1908                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1909               }
1910             }
1911             if (!valid_qr) {
1912               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1913               for (jj=0;jj<size_of_constraint;jj++) {
1914                 for (ii=0;ii<primal_dofs;ii++) {
1915                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1916                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1917                   }
1918                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1919                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1920                   }
1921                 }
1922               }
1923             } else {
1924               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1925             }
1926           }
1927         } else { /* simple transformation block */
1928           PetscInt row,col;
1929           PetscScalar val;
1930           for (j=0;j<size_of_constraint;j++) {
1931             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1932             if (!PetscBTLookup(is_primal,row)) {
1933               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1934               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1935               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1936             } else {
1937               for (k=0;k<size_of_constraint;k++) {
1938                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1939                 if (row != col) {
1940                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1941                 } else {
1942                   val = 1.0;
1943                 }
1944                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1945               }
1946             }
1947           }
1948         }
1949         /* increment primal counter */
1950         primal_counter += primal_dofs;
1951       } else {
1952         if (pcbddc->dbg_flag) {
1953           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1954         }
1955       }
1956       /* increment constraint counter total_counts */
1957       total_counts += primal_dofs;
1958     }
1959 
1960     /* free workspace */
1961     if (qr_needed) {
1962       if (pcbddc->dbg_flag) {
1963         ierr = PetscFree(work);CHKERRQ(ierr);
1964       }
1965       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1966       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1967       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1968       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1969       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1970     }
1971     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1972     /* assembling */
1973     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975     /*
1976     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1977     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1978     */
1979   }
1980 
1981   /* flush dbg viewer */
1982   if (pcbddc->dbg_flag) {
1983     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1984   }
1985 
1986   /* free workspace */
1987   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1988   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1989   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1990   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1991   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1992   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1993   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1994   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1995   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1996   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2002 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2003 {
2004   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2005   PC_IS       *pcis = (PC_IS*)pc->data;
2006   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2007   PetscInt    bs,ierr,i,vertex_size;
2008   PetscViewer viewer=pcbddc->dbg_viewer;
2009 
2010   PetscFunctionBegin;
2011   /* Reset previously computed graph */
2012   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2013   /* Init local Graph struct */
2014   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2015 
2016   /* Check validity of the csr graph passed in by the user */
2017   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2018     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2019   }
2020 
2021   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2022   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2023     Mat mat_adj;
2024     const PetscInt *xadj,*adjncy;
2025     PetscBool flg_row=PETSC_TRUE;
2026 
2027     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2028     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2029     if (!flg_row) {
2030       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2031     }
2032     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2033     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2034     if (!flg_row) {
2035       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2036     }
2037     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2038   }
2039 
2040   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2041   vertex_size = 1;
2042   if (!pcbddc->user_provided_isfordofs) {
2043     if (!pcbddc->n_ISForDofs) {
2044       IS *custom_ISForDofs;
2045 
2046       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2047       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2048       for (i=0;i<bs;i++) {
2049         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2050       }
2051       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2052       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2053       /* remove my references to IS objects */
2054       for (i=0;i<bs;i++) {
2055         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2056       }
2057       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2058     }
2059   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2060     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2061   }
2062 
2063   /* Setup of Graph */
2064   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2065 
2066   /* Graph's connected components analysis */
2067   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2068 
2069   /* print some info to stdout */
2070   if (pcbddc->dbg_flag) {
2071     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2078 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2079 {
2080   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2081   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   n = 0;
2086   vertices = 0;
2087   if (pcbddc->ConstraintMatrix) {
2088     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2089     for (i=0;i<local_primal_size;i++) {
2090       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2091       if (size_of_constraint == 1) n++;
2092       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2093     }
2094     if (vertices_idx) {
2095       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2096       n = 0;
2097       for (i=0;i<local_primal_size;i++) {
2098         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2099         if (size_of_constraint == 1) {
2100           vertices[n++]=row_cmat_indices[0];
2101         }
2102         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2103       }
2104     }
2105   }
2106   *n_vertices = n;
2107   if (vertices_idx) *vertices_idx = vertices;
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2113 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2114 {
2115   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2116   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2117   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2118   PetscBT        touched;
2119   PetscErrorCode ierr;
2120 
2121     /* This function assumes that the number of local constraints per connected component
2122        is not greater than the number of nodes defined for the connected component
2123        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2124   PetscFunctionBegin;
2125   n = 0;
2126   constraints_index = 0;
2127   if (pcbddc->ConstraintMatrix) {
2128     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2129     max_size_of_constraint = 0;
2130     for (i=0;i<local_primal_size;i++) {
2131       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2132       if (size_of_constraint > 1) {
2133         n++;
2134       }
2135       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2136       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2137     }
2138     if (constraints_idx) {
2139       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2140       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2141       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2142       n = 0;
2143       for (i=0;i<local_primal_size;i++) {
2144         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2145         if (size_of_constraint > 1) {
2146           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2147           /* find first untouched local node */
2148           j = 0;
2149           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2150           min_index = row_cmat_global_indices[j];
2151           min_loc = j;
2152           /* search the minimum among nodes not yet touched on the connected component
2153              since there can be more than one constraint on a single cc */
2154           for (j=1;j<size_of_constraint;j++) {
2155             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2156               min_index = row_cmat_global_indices[j];
2157               min_loc = j;
2158             }
2159           }
2160           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2161           constraints_index[n++] = row_cmat_indices[min_loc];
2162         }
2163         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2164       }
2165       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2166       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2167     }
2168   }
2169   *n_constraints = n;
2170   if (constraints_idx) *constraints_idx = constraints_index;
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 /* the next two functions has been adapted from pcis.c */
2175 #undef __FUNCT__
2176 #define __FUNCT__ "PCBDDCApplySchur"
2177 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2178 {
2179   PetscErrorCode ierr;
2180   PC_IS          *pcis = (PC_IS*)(pc->data);
2181 
2182   PetscFunctionBegin;
2183   if (!vec2_B) { vec2_B = v; }
2184   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2185   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2186   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2187   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2188   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2194 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2195 {
2196   PetscErrorCode ierr;
2197   PC_IS          *pcis = (PC_IS*)(pc->data);
2198 
2199   PetscFunctionBegin;
2200   if (!vec2_B) { vec2_B = v; }
2201   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2202   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2203   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2204   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2205   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "PCBDDCSubsetNumbering"
2211 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2212 {
2213   Vec            local_vec,global_vec;
2214   IS             seqis,paris;
2215   VecScatter     scatter_ctx;
2216   PetscScalar    *array;
2217   PetscInt       *temp_global_dofs;
2218   PetscScalar    globalsum;
2219   PetscInt       i,j,s;
2220   PetscInt       nlocals,first_index,old_index,max_local;
2221   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2222   PetscMPIInt    *dof_sizes,*dof_displs;
2223   PetscBool      first_found;
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   /* mpi buffers */
2228   MPI_Comm_size(comm,&size_prec_comm);
2229   MPI_Comm_rank(comm,&rank_prec_comm);
2230   j = ( !rank_prec_comm ? size_prec_comm : 0);
2231   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2232   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2233   /* get maximum size of subset */
2234   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2235   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2236   max_local = 0;
2237   if (n_local_dofs) {
2238     max_local = temp_global_dofs[0];
2239     for (i=1;i<n_local_dofs;i++) {
2240       if (max_local < temp_global_dofs[i] ) {
2241         max_local = temp_global_dofs[i];
2242       }
2243     }
2244   }
2245   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2246   max_global++;
2247   max_local = 0;
2248   if (n_local_dofs) {
2249     max_local = local_dofs[0];
2250     for (i=1;i<n_local_dofs;i++) {
2251       if (max_local < local_dofs[i] ) {
2252         max_local = local_dofs[i];
2253       }
2254     }
2255   }
2256   max_local++;
2257   /* allocate workspace */
2258   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2259   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2260   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2261   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2262   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2263   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2264   /* create scatter */
2265   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2266   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2267   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2268   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2269   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2270   /* init array */
2271   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2272   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2273   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2274   if (local_dofs_mult) {
2275     for (i=0;i<n_local_dofs;i++) {
2276       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2277     }
2278   } else {
2279     for (i=0;i<n_local_dofs;i++) {
2280       array[local_dofs[i]]=1.0;
2281     }
2282   }
2283   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2284   /* scatter into global vec and get total number of global dofs */
2285   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2286   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2288   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2289   /* Fill global_vec with cumulative function for global numbering */
2290   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2291   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2292   nlocals = 0;
2293   first_index = -1;
2294   first_found = PETSC_FALSE;
2295   for (i=0;i<s;i++) {
2296     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2297       first_found = PETSC_TRUE;
2298       first_index = i;
2299     }
2300     nlocals += (PetscInt)PetscRealPart(array[i]);
2301   }
2302   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2303   if (!rank_prec_comm) {
2304     dof_displs[0]=0;
2305     for (i=1;i<size_prec_comm;i++) {
2306       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2307     }
2308   }
2309   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2310   if (first_found) {
2311     array[first_index] += (PetscScalar)nlocals;
2312     old_index = first_index;
2313     for (i=first_index+1;i<s;i++) {
2314       if (PetscRealPart(array[i]) > 0.0) {
2315         array[i] += array[old_index];
2316         old_index = i;
2317       }
2318     }
2319   }
2320   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2321   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2322   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2323   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2324   /* get global ordering of local dofs */
2325   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2326   if (local_dofs_mult) {
2327     for (i=0;i<n_local_dofs;i++) {
2328       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2329     }
2330   } else {
2331     for (i=0;i<n_local_dofs;i++) {
2332       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2333     }
2334   }
2335   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2336   /* free workspace */
2337   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2338   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2339   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2340   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2341   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2342   /* return pointer to global ordering of local dofs */
2343   *global_numbering_subset = temp_global_dofs;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2349 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2350 {
2351   PetscInt       i,j;
2352   PetscScalar    *alphas;
2353   PetscErrorCode ierr;
2354 
2355   PetscFunctionBegin;
2356   /* this implements stabilized Gram-Schmidt */
2357   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2358   for (i=0;i<n;i++) {
2359     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2360     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2361     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2362   }
2363   ierr = PetscFree(alphas);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatConvert_IS_AIJ"
2370 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2371 {
2372   Mat new_mat;
2373   Mat_IS *matis = (Mat_IS*)(mat->data);
2374   /* info on mat */
2375   PetscInt bs,rows,cols;
2376   PetscInt lrows,lcols;
2377   PetscInt local_rows,local_cols;
2378   PetscMPIInt nsubdomains;
2379   /* preallocation */
2380   Vec vec_dnz,vec_onz;
2381   PetscScalar *my_dnz,*my_onz,*array;
2382   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2383   PetscInt  index_row,index_col,owner;
2384   PetscInt  *local_indices,*global_indices;
2385   /* work */
2386   PetscInt i,j;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   /* CHECKS */
2391   /* get info from mat */
2392   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2393   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2394   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2395   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2396 
2397   /* MAT_INITIAL_MATRIX */
2398   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2399   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2400   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2401   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2402   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2403   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2404 
2405   /*
2406     preallocation
2407   */
2408 
2409   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2410   /*
2411      Some vectors are needed to sum up properly on shared interface dofs.
2412      Note that preallocation is not exact, since it overestimates nonzeros
2413   */
2414 /*
2415   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2416   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2417   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2418   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2419 */
2420   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2421   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2422   /* All processes need to compute entire row ownership */
2423   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2424   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2425   for (i=0;i<nsubdomains;i++) {
2426     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2427       row_ownership[j]=i;
2428     }
2429   }
2430   /* map indices of local mat to global values */
2431   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2432   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2433   for (i=0;i<local_rows;i++) local_indices[i]=i;
2434   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2435   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2436 
2437   /*
2438      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2439      then, they will be summed up properly. This way, preallocation is always sufficient
2440   */
2441   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2442   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2443   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2444   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2445   for (i=0;i<local_rows;i++) {
2446     index_row = global_indices[i];
2447     for (j=i;j<local_rows;j++) {
2448       owner = row_ownership[index_row];
2449       index_col = global_indices[j];
2450       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2451         my_dnz[i] += 1.0;
2452       } else { /* offdiag block */
2453         my_onz[i] += 1.0;
2454       }
2455       /* same as before, interchanging rows and cols */
2456       if (i != j) {
2457         owner = row_ownership[index_col];
2458         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2459           my_dnz[j] += 1.0;
2460         } else {
2461           my_onz[j] += 1.0;
2462         }
2463       }
2464     }
2465   }
2466   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2467   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2468   if (local_rows) { /* multilevel guard */
2469     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2470     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2471   }
2472   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2473   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2474   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2475   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2476   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2477   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2478   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2479 
2480   /* set computed preallocation in dnz and onz */
2481   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2482   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2483   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2484   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2485   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2486   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2487   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2488   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2489 
2490   /* Resize preallocation if overestimated */
2491   for (i=0;i<lrows;i++) {
2492     dnz[i] = PetscMin(dnz[i],lcols);
2493     onz[i] = PetscMin(onz[i],cols-lcols);
2494   }
2495   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2496   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2497 
2498   /*
2499     Set values. Very Basic.
2500   */
2501   for (i=0;i<local_rows;i++) {
2502     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2503     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2504     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2505     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2506     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2507   }
2508   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2509   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2510   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2511 
2512   /* get back mat */
2513   *M = new_mat;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatISSubassemble_Private"
2519 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2520 {
2521   Mat             subdomain_adj;
2522   IS              new_ranks,ranks_send_to;
2523   MatPartitioning partitioner;
2524   Mat_IS          *matis;
2525   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2526   PetscInt        prank;
2527   PetscMPIInt     size,rank,color;
2528   PetscInt        *xadj,*adjncy,*oldranks;
2529   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2530   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2531   PetscErrorCode  ierr;
2532   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2533   PetscSubcomm    subcomm;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2537   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2538   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2539 
2540   /* Get info on mapping */
2541   matis = (Mat_IS*)(mat->data);
2542   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2543   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2544 
2545   /* build local CSR graph of subdomains' connectivity */
2546   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2547   xadj[0] = 0;
2548   xadj[1] = PetscMax(n_neighs-1,0);
2549   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2550   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2551 
2552   if (threshold) {
2553     PetscInt* count,min_threshold;
2554     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2555     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2556     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2557       for (j=0;j<n_shared[i];j++) {
2558         count[shared[i][j]] += 1;
2559       }
2560     }
2561     /* adapt threshold since we dont want to lose significant connections */
2562     min_threshold = n_neighs;
2563     for (i=1;i<n_neighs;i++) {
2564       for (j=0;j<n_shared[i];j++) {
2565         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2566       }
2567     }
2568     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2569     threshold = PetscMax(min_threshold+1,threshold);
2570     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2571     xadj[1] = 0;
2572     for (i=1;i<n_neighs;i++) {
2573       for (j=0;j<n_shared[i];j++) {
2574         if (count[shared[i][j]] < threshold) {
2575           adjncy[xadj[1]] = neighs[i];
2576           adjncy_wgt[xadj[1]] = n_shared[i];
2577           xadj[1]++;
2578           break;
2579         }
2580       }
2581     }
2582     ierr = PetscFree(count);CHKERRQ(ierr);
2583   } else {
2584     if (xadj[1]) {
2585       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2586       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2587     }
2588   }
2589   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2590   if (use_square) {
2591     for (i=0;i<xadj[1];i++) {
2592       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2593     }
2594   }
2595   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2596 
2597   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2598 
2599   /*
2600     Restrict work on active processes only.
2601   */
2602   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2603   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2604   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2605   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2606   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2607   if (color) {
2608     ierr = PetscFree(xadj);CHKERRQ(ierr);
2609     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2610     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2611   } else {
2612     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2613     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2614     prank = rank;
2615     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2616     for (i=0;i<size;i++) {
2617       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2618     }
2619     for (i=0;i<xadj[1];i++) {
2620       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2621     }
2622     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2623     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2624     n_subdomains = (PetscInt)size;
2625     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2626 
2627     /* Partition */
2628     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2629     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2630     if (use_vwgt) {
2631       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2632       v_wgt[0] = local_size;
2633       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2634     }
2635     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2636     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2637     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2638     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2639     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2640 
2641     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2642     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2643     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2644     /* clean up */
2645     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2646     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2647     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2648     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2649   }
2650   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2651 
2652   /* assemble parallel IS for sends */
2653   i = 1;
2654   if (color) i=0;
2655   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2656   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2657 
2658   /* get back IS */
2659   *is_sends = ranks_send_to;
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2664 
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatISSubassemble"
2667 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2668 {
2669   Mat                    local_mat,new_mat;
2670   Mat_IS                 *matis;
2671   IS                     is_sends_internal;
2672   PetscInt               rows,cols;
2673   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2674   PetscBool              ismatis,isdense;
2675   ISLocalToGlobalMapping l2gmap;
2676   PetscInt*              l2gmap_indices;
2677   const PetscInt*        is_indices;
2678   MatType                new_local_type;
2679   MatTypePrivate         new_local_type_private;
2680   /* buffers */
2681   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2682   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2683   /* MPI */
2684   MPI_Comm               comm;
2685   PetscMPIInt            n_sends,n_recvs,commsize;
2686   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2687   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2688   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2689   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2690   PetscErrorCode         ierr;
2691 
2692   PetscFunctionBegin;
2693   /* checks */
2694   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2695   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2696   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2697   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2698   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2699   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2700   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2701   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2702   PetscValidLogicalCollectiveInt(mat,bs,0);
2703   /* prepare IS for sending if not provided */
2704   if (!is_sends) {
2705     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2706     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2707   } else {
2708     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2709     is_sends_internal = is_sends;
2710   }
2711 
2712   /* get pointer of MATIS data */
2713   matis = (Mat_IS*)mat->data;
2714 
2715   /* get comm */
2716   comm = PetscObjectComm((PetscObject)mat);
2717 
2718   /* compute number of sends */
2719   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2720   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2721 
2722   /* compute number of receives */
2723   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2724   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2725   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2726   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2727   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2728   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2729   ierr = PetscFree(iflags);CHKERRQ(ierr);
2730 
2731   /* prepare send/receive buffers */
2732   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2733   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2734   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2735   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2736 
2737   /* Get data from local mat */
2738   if (!isdense) {
2739     /* TODO: See below some guidelines on how to prepare the local buffers */
2740     /*
2741        send_buffer_vals should contain the raw values of the local matrix
2742        send_buffer_idxs should contain:
2743        - MatType_PRIVATE type
2744        - PetscInt        size_of_l2gmap
2745        - PetscInt        global_row_indices[size_of_l2gmap]
2746        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2747     */
2748   } else {
2749     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2750     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2751     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2752     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2753     send_buffer_idxs[1] = i;
2754     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2755     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2756     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2757     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2758     for (i=0;i<n_sends;i++) {
2759       ilengths_vals[is_indices[i]] = len*len;
2760       ilengths_idxs[is_indices[i]] = len+2;
2761     }
2762   }
2763   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2764   buf_size_idxs = 0;
2765   buf_size_vals = 0;
2766   for (i=0;i<n_recvs;i++) {
2767     buf_size_idxs += (PetscInt)olengths_idxs[i];
2768     buf_size_vals += (PetscInt)olengths_vals[i];
2769   }
2770   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2771   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2772 
2773   /* get new tags for clean communications */
2774   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2775   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2776 
2777   /* allocate for requests */
2778   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2779   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2780   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2781   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2782 
2783   /* communications */
2784   ptr_idxs = recv_buffer_idxs;
2785   ptr_vals = recv_buffer_vals;
2786   for (i=0;i<n_recvs;i++) {
2787     source_dest = onodes[i];
2788     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2789     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2790     ptr_idxs += olengths_idxs[i];
2791     ptr_vals += olengths_vals[i];
2792   }
2793   for (i=0;i<n_sends;i++) {
2794     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2795     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2796     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2797   }
2798   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2799   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2800 
2801   /* assemble new l2g map */
2802   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2803   ptr_idxs = recv_buffer_idxs;
2804   buf_size_idxs = 0;
2805   for (i=0;i<n_recvs;i++) {
2806     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2807     ptr_idxs += olengths_idxs[i];
2808   }
2809   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2810   ptr_idxs = recv_buffer_idxs;
2811   buf_size_idxs = 0;
2812   for (i=0;i<n_recvs;i++) {
2813     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2814     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2815     ptr_idxs += olengths_idxs[i];
2816   }
2817   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2818   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2819   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2820 
2821   /* infer new local matrix type from received local matrices type */
2822   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2823   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2824   new_local_type_private = MATAIJ_PRIVATE;
2825   new_local_type = MATSEQAIJ;
2826   if (n_recvs) {
2827     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2828     ptr_idxs = recv_buffer_idxs;
2829     for (i=0;i<n_recvs;i++) {
2830       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2831         new_local_type_private = MATAIJ_PRIVATE;
2832         break;
2833       }
2834       ptr_idxs += olengths_idxs[i];
2835     }
2836     switch (new_local_type_private) {
2837       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2838         new_local_type = MATSEQAIJ;
2839         bs = 1;
2840         break;
2841       case MATAIJ_PRIVATE:
2842         new_local_type = MATSEQAIJ;
2843         bs = 1;
2844         break;
2845       case MATBAIJ_PRIVATE:
2846         new_local_type = MATSEQBAIJ;
2847         break;
2848       case MATSBAIJ_PRIVATE:
2849         new_local_type = MATSEQSBAIJ;
2850         break;
2851       default:
2852         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2853         break;
2854     }
2855   }
2856 
2857   /* create MATIS object */
2858   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2859   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2860   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2861   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2862   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2863   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2864 
2865   /* set values */
2866   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2867   ptr_vals = recv_buffer_vals;
2868   ptr_idxs = recv_buffer_idxs;
2869   for (i=0;i<n_recvs;i++) {
2870     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2871       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2872       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2873       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2874       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2875       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2876     }
2877     ptr_idxs += olengths_idxs[i];
2878     ptr_vals += olengths_vals[i];
2879   }
2880   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884 
2885   { /* check */
2886     Vec       lvec,rvec;
2887     PetscReal infty_error;
2888 
2889     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2890     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2891     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2892     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2893     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2894     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2895     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2896     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2897     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2898   }
2899 
2900   /* free workspace */
2901   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2902   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2903   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2904   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2905   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2906   if (isdense) {
2907     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2908     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2909   } else {
2910     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2911   }
2912   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2913   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2914   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2915   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2916   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2917   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2918   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2919   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2920   ierr = PetscFree(onodes);CHKERRQ(ierr);
2921   /* get back new mat */
2922   *mat_n = new_mat;
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2928 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2929 {
2930   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2931   PC_IS                  *pcis = (PC_IS*)pc->data;
2932   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2933   MatNullSpace           CoarseNullSpace=NULL;
2934   ISLocalToGlobalMapping coarse_islg;
2935   IS                     coarse_is;
2936   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2937   PetscInt               im_active=-1,active_procs=-1;
2938   PC                     pc_temp;
2939   PCType                 coarse_pc_type;
2940   KSPType                coarse_ksp_type;
2941   PetscBool              multilevel_requested,multilevel_allowed;
2942   PetscBool              setsym,issym,isbddc,isnn;
2943   MatStructure           matstruct;
2944   PetscErrorCode         ierr;
2945 
2946   PetscFunctionBegin;
2947   /* Assign global numbering to coarse dofs */
2948   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2949 
2950   /* infer some info from user */
2951   issym = PETSC_FALSE;
2952   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2953   multilevel_allowed = PETSC_FALSE;
2954   multilevel_requested = PETSC_FALSE;
2955   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2956   if (multilevel_requested) {
2957     /* count "active processes" */
2958     im_active = !!(pcis->n);
2959     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2960     if (active_procs/pcbddc->coarsening_ratio < 2) {
2961       multilevel_allowed = PETSC_FALSE;
2962     } else {
2963       multilevel_allowed = PETSC_TRUE;
2964     }
2965   }
2966 
2967   /* set defaults for coarse KSP and PC */
2968   if (multilevel_allowed) {
2969     if (issym) {
2970       coarse_ksp_type = KSPRICHARDSON;
2971     } else {
2972       coarse_ksp_type = KSPCHEBYSHEV;
2973     }
2974     coarse_pc_type = PCBDDC;
2975   } else {
2976     coarse_ksp_type = KSPPREONLY;
2977     coarse_pc_type = PCREDUNDANT;
2978   }
2979 
2980   /* create the coarse KSP object only once with defaults */
2981   if (!pcbddc->coarse_ksp) {
2982     char prefix[256],str_level[3];
2983     size_t len;
2984     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2985     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2986     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2987     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2988     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2989     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2990     /* prefix */
2991     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2992     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2993     if (!pcbddc->current_level) {
2994       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2995       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2996     } else {
2997       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2998       if (pcbddc->current_level>1) len -= 2;
2999       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3000       *(prefix+len)='\0';
3001       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3002       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3003     }
3004     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3005   }
3006   /* allow user customization */
3007   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3008   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3009   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3010 
3011   /* get some info after set from options */
3012   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3013   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3014   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3015   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3016     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3017     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3018     isbddc = PETSC_FALSE;
3019   }
3020 
3021   /* propagate BDDC info to the next level */
3022   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3023   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3024   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3025 
3026   /* creates temporary MATIS object for coarse matrix */
3027   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3028   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3029   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3030   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3031   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3032   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3034   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3035   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3036   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3037 
3038   /* assemble coarse matrix */
3039   if (isbddc || isnn) {
3040     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3041   } else {
3042     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3043   }
3044   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3045 
3046   /* create local to global scatters for coarse problem */
3047   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3048   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3049   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3050 
3051   /* propagate symmetry info to coarse matrix */
3052   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3053 
3054   /* Compute coarse null space (special handling by BDDC only) */
3055   if (pcbddc->NullSpace) {
3056     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3057     if (isbddc) {
3058       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3059     } else {
3060       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3061     }
3062   }
3063 
3064   /* set operators */
3065   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3066   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3067 
3068   /* additional KSP customization */
3069   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3070   if (max_it < 5) {
3071     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3072   }
3073   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3074 
3075 
3076   /* print some info if requested */
3077   if (pcbddc->dbg_flag) {
3078     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3079     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3080     if (!multilevel_allowed) {
3081       if (multilevel_requested) {
3082         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3083       } else if (pcbddc->max_levels) {
3084         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3085       }
3086     }
3087     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3088     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3089   }
3090 
3091   /* setup coarse ksp */
3092   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3093   if (pcbddc->dbg_flag) {
3094     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3095     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3096     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3098   }
3099 
3100   /* Check coarse problem if requested */
3101   if (pcbddc->dbg_flag) {
3102     KSP       check_ksp;
3103     KSPType   check_ksp_type;
3104     PC        check_pc;
3105     Vec       check_vec;
3106     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3107     PetscInt  its;
3108     PetscBool ispreonly,compute;
3109 
3110     /* Create ksp object suitable for estimation of extreme eigenvalues */
3111     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3112     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3113     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3114     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3115     if (ispreonly) {
3116       check_ksp_type = KSPPREONLY;
3117       compute = PETSC_FALSE;
3118     } else {
3119       if (issym) check_ksp_type = KSPCG;
3120       else check_ksp_type = KSPGMRES;
3121       compute = PETSC_TRUE;
3122     }
3123     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3124     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3125     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3126     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3127     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3128     /* create random vec */
3129     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3130     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3131     if (CoarseNullSpace) {
3132       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3133     }
3134     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3135     /* solve coarse problem */
3136     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3137     if (CoarseNullSpace) {
3138       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3139     }
3140     /* check coarse problem residual error */
3141     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3142     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3143     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3144     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3145     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3146     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3147     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3148     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3149     /* get eigenvalue estimation if preonly has not been requested */
3150     if (!ispreonly) {
3151       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3152       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3153       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3154     }
3155     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3156     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3157   }
3158   /* free memory */
3159   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3160   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3166 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3167 {
3168   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3169   PC_IS*         pcis = (PC_IS*)pc->data;
3170   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3171   PetscInt       i,j,coarse_size;
3172   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3173   PetscErrorCode ierr;
3174 
3175   PetscFunctionBegin;
3176   /* get indices in local ordering for vertices and constraints */
3177   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3178   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3179   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3180   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3181   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3182   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3183   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3184 
3185   /* Compute global number of coarse dofs */
3186   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3187 
3188   /* check numbering */
3189   if (pcbddc->dbg_flag) {
3190     PetscScalar coarsesum,*array;
3191 
3192     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3193     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3194     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3195     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3196     for (i=0;i<pcbddc->local_primal_size;i++) {
3197       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3198     }
3199     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3200     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3201     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3202     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3203     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3204     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3205     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3206     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3207     for (i=0;i<pcis->n;i++) {
3208       if (array[i] == 1.0) {
3209         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3210       }
3211     }
3212     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3213     for (i=0;i<pcis->n;i++) {
3214       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3215     }
3216     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3217     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3218     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3219     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3220     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3221     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3222     if (pcbddc->dbg_flag > 1) {
3223       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3224       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3225       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3226       for (i=0;i<pcbddc->local_primal_size;i++) {
3227         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3228       }
3229       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3230     }
3231     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3232   }
3233   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3234   /* get back data */
3235   *coarse_size_n = coarse_size;
3236   *local_primal_indices_n = local_primal_indices;
3237   PetscFunctionReturn(0);
3238 }
3239 
3240