xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision af732b373bef41710375f94ce584e122b21ff981)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     PetscInt ibs,iibs,mbs;
263     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
264     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
265     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
266     ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr);
267     if (ibs != mbs || iibs != mbs) { /* TODO BAIJ */
268       Mat newmat;
269       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
270       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
271       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
272       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
273       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
274     } else {
275       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     }
279     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
280   }
281 
282   /* Matrix of coarse basis functions (local) */
283   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
284   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
285   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
286   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
287   if (pcbddc->switch_static || pcbddc->dbg_flag) {
288     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
289     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
290     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
291     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
292   }
293 
294   if (pcbddc->dbg_flag) {
295     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
297     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
298   }
299   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
300   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
301 
302   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
303 
304   /* vertices */
305   for (i=0;i<n_vertices;i++) {
306     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
307     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
308     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
309     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
310     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
311     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
313     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
314     if (n_constraints) {
315       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
316       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
317       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
318     }
319     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
320     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
321 
322     /* Set values in coarse basis function and subdomain part of coarse_mat */
323     /* coarse basis functions */
324     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
325     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
327     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
328     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
329     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
330     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
331     if (pcbddc->switch_static || pcbddc->dbg_flag) {
332       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
335       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
336       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
337     }
338     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
339     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
340     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
341     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
342     if (n_constraints) {
343       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
344       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
345       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
346     }
347 
348     /* check */
349     if (pcbddc->dbg_flag) {
350       /* assemble subdomain vector on local nodes */
351       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
352       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
353       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
354       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
355       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
356       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
357       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
358       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
359       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
363       if (n_constraints) {
364         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
365         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
366         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
367       }
368       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
370       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
371       /* check saddle point solution */
372       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
374       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
375       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
376       /* shift by the identity matrix */
377       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
378       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
380       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
381     }
382   }
383 
384   /* constraints */
385   for (i=0;i<n_constraints;i++) {
386     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
387     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
388     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
389     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
390     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
391     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
392     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
393     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
394     if (n_vertices) {
395       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
396     }
397     /* Set values in coarse basis function and subdomain part of coarse_mat */
398     /* coarse basis functions */
399     j = i+n_vertices; /* don't touch this! */
400     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
401     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
404     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
405     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
406     if (pcbddc->switch_static || pcbddc->dbg_flag) {
407       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
409       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
411       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
412     }
413     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
414     if (n_vertices) {
415       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
416       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
417       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
418     }
419     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
420     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
421     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
422 
423     if (pcbddc->dbg_flag) {
424       /* assemble subdomain vector on nodes */
425       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
426       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
429       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
430       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
431       /* assemble subdomain vector of lagrange multipliers */
432       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
433       if (n_vertices) {
434         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
437       }
438       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
439       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
440       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
441       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
443       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
444       /* check saddle point solution */
445       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
447       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
448       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
449       /* shift by the identity matrix */
450       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
454     }
455   }
456   /* call assembling routines for local coarse basis */
457   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459   if (pcbddc->switch_static || pcbddc->dbg_flag) {
460     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
462   }
463 
464   /* compute other basis functions for non-symmetric problems */
465   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
466   if (!setsym || (setsym && !issym)) {
467     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
468     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
470     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
471     if (pcbddc->switch_static || pcbddc->dbg_flag) {
472       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
473       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
474       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
475       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
476     }
477     for (i=0;i<pcbddc->local_primal_size;i++) {
478       if (n_constraints) {
479         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
480         for (j=0;j<n_constraints;j++) {
481           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
482         }
483         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
484         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
485       }
486       if (i<n_vertices) {
487         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
488         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
489         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
490         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
491         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
492         if (n_constraints) {
493           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
494         }
495       } else {
496         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
497       }
498       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
499       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
500       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
503       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
504       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
505       if (i<n_vertices) {
506         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
507       }
508       if (pcbddc->switch_static || pcbddc->dbg_flag) {
509         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
511         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
512         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
514       }
515 
516       if (pcbddc->dbg_flag) {
517         /* assemble subdomain vector on nodes */
518         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
520         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
522         if (i<n_vertices) {
523           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
524         }
525         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
526         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
527         /* assemble subdomain vector of lagrange multipliers */
528         for (j=0;j<pcbddc->local_primal_size;j++) {
529           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
530         }
531         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
532         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
533         /* check saddle point solution */
534         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
536         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
537         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
538         /* shift by the identity matrix */
539         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
540         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
542         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
543       }
544     }
545     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547     if (pcbddc->switch_static || pcbddc->dbg_flag) {
548       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     }
551   }
552   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
553   /* Checking coarse_sub_mat and coarse basis functios */
554   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
556   if (pcbddc->dbg_flag) {
557     Mat         coarse_sub_mat;
558     Mat         AUXMAT,TM1,TM2,TM3,TM4;
559     Mat         coarse_phi_D,coarse_phi_B;
560     Mat         coarse_psi_D,coarse_psi_B;
561     Mat         A_II,A_BB,A_IB,A_BI;
562     MatType     checkmattype=MATSEQAIJ;
563     PetscReal   real_value;
564 
565     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
568     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
570     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
571     if (pcbddc->coarse_psi_B) {
572       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
573       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
574     }
575     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
576 
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
578     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
579     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
580     if (pcbddc->coarse_psi_B) {
581       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
582       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
583       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
584       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
585       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
586       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
587       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
588       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
589       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
590       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
591       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
592       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
593     } else {
594       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
595       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
596       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
597       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
598       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
599       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
600       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
601       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
602     }
603     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
606     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
607     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
608     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
612     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
613     for (i=0;i<pcbddc->local_primal_size;i++) {
614       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
615     }
616     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
617     for (i=0;i<pcbddc->local_primal_size;i++) {
618       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
619     }
620     if (pcbddc->coarse_psi_B) {
621       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
622       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
623         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
624       }
625       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
626       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
627         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
628       }
629     }
630     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
634     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
638     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
640     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
641     if (pcbddc->coarse_psi_B) {
642       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
643       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
644     }
645     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
646     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
647     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
648     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
649   }
650   /* free memory */
651   if (n_vertices) {
652     ierr = PetscFree(vertices);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
654     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
657     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
658   }
659   if (n_constraints) {
660     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
661     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
662     ierr = MatDestroy(&M1);CHKERRQ(ierr);
663     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
664   }
665   ierr = PetscFree(auxindices);CHKERRQ(ierr);
666   /* get back data */
667   *coarse_submat_vals_n = coarse_submat_vals;
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
673 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
674 {
675   PC_IS*            pcis = (PC_IS*)(pc->data);
676   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
677   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
678   /* manage repeated solves */
679   MatReuse          reuse;
680   MatStructure      matstruct;
681   PetscErrorCode    ierr;
682 
683   PetscFunctionBegin;
684   /* get mat flags */
685   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
686   reuse = MAT_INITIAL_MATRIX;
687   if (pc->setupcalled) {
688     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
689     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
690     if (matstruct == SAME_NONZERO_PATTERN) {
691       reuse = MAT_REUSE_MATRIX;
692     } else {
693       reuse = MAT_INITIAL_MATRIX;
694     }
695   }
696   if (reuse == MAT_INITIAL_MATRIX) {
697     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
701     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
702   }
703 
704   /* transform local matrices if needed */
705   if (pcbddc->use_change_of_basis) {
706     Mat         change_mat_all;
707     PetscScalar *row_cmat_values;
708     PetscInt    *row_cmat_indices;
709     PetscInt    *nnz,*is_indices,*temp_indices;
710     PetscInt    i,j,k,n_D,n_B;
711 
712     /* Get Non-overlapping dimensions */
713     n_B = pcis->n_B;
714     n_D = pcis->n-n_B;
715 
716     /* compute nonzero structure of change of basis on all local nodes */
717     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
718     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
719     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
720     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
722     k=1;
723     for (i=0;i<n_B;i++) {
724       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
725       nnz[is_indices[i]]=j;
726       if (k < j) k = j;
727       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
728     }
729     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     /* assemble change of basis matrix on the whole set of local dofs */
731     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
732     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
733     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
734     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
735     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) {
738       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
739     }
740     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
742     for (i=0;i<n_B;i++) {
743       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
744       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
745       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
746       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
747     }
748     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
750     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
751     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
752     if (i==1) {
753       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
754     } else {
755       Mat work_mat;
756       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
757       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
758       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
759     }
760     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
761     ierr = PetscFree(nnz);CHKERRQ(ierr);
762     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
763   } else {
764     /* without change of basis, the local matrix is unchanged */
765     if (!pcbddc->local_mat) {
766       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
767       pcbddc->local_mat = matis->A;
768     }
769   }
770 
771   /* get submatrices */
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
775   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
781 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
782 {
783   PC_IS*         pcis = (PC_IS*)(pc->data);
784   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
785   IS             is_aux1,is_aux2;
786   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
787   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
788   PetscBT        bitmask;
789   PetscErrorCode ierr;
790 
791   PetscFunctionBegin;
792   /* Set Non-overlapping dimensions */
793   n_B = pcis->n_B; n_D = pcis->n - n_B;
794   /* get vertex indices from constraint matrix */
795   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
796   /* Set number of constraints */
797   n_constraints = pcbddc->local_primal_size-n_vertices;
798   /* create auxiliary bitmask */
799   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
800   for (i=0;i<n_vertices;i++) {
801     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
802   }
803   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
804   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
805   for (i=0, n_R=0; i<pcis->n; i++) {
806     if (!PetscBTLookup(bitmask,i)) {
807       idx_R_local[n_R] = i;
808       n_R++;
809     }
810   }
811   ierr = PetscFree(vertices);CHKERRQ(ierr);
812   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
813 
814   /* print some info if requested */
815   if (pcbddc->dbg_flag) {
816     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
817     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
821     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
822     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
823   }
824 
825   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
827   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
828   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
829   for (i=0; i<n_D; i++) {
830     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
831   }
832   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   for (i=0, j=0; i<n_R; i++) {
834     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
835       aux_array1[j++] = i;
836     }
837   }
838   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
839   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
840   for (i=0, j=0; i<n_B; i++) {
841     if (!PetscBTLookup(bitmask,is_indices[i])) {
842       aux_array2[j++] = i;
843     }
844   }
845   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
846   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
847   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
849   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
850 
851   if (pcbddc->switch_static || pcbddc->dbg_flag) {
852     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
853     for (i=0, j=0; i<n_R; i++) {
854       if (PetscBTLookup(bitmask,idx_R_local[i])) {
855         aux_array1[j++] = i;
856       }
857     }
858     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
859     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
860     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
861   }
862   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
869 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
870 {
871   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
872   PC_IS          *pcis = (PC_IS*)pc->data;
873   PC             pc_temp;
874   Mat            A_RR;
875   Vec            vec1,vec2,vec3;
876   MatStructure   matstruct;
877   PetscScalar    m_one = -1.0;
878   PetscReal      value;
879   PetscInt       n_D,n_R,ibs,mbs;
880   PetscBool      use_exact,use_exact_reduced;
881   PetscErrorCode ierr;
882   /* prefixes stuff */
883   char           dir_prefix[256],neu_prefix[256],str_level[3];
884   size_t         len;
885 
886   PetscFunctionBegin;
887   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
888 
889   /* compute prefixes */
890   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
891   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
892   if (!pcbddc->current_level) {
893     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
894     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
895     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
896     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
897   } else {
898     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
899     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
900     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
901     len -= 15; /* remove "pc_bddc_coarse_" */
902     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
903     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
904     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
905     *(dir_prefix+len)='\0';
906     *(neu_prefix+len)='\0';
907     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
908     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
909     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
910     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
911   }
912 
913   /* DIRICHLET PROBLEM */
914   /* Matrix for Dirichlet problem is pcis->A_II */
915   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
916   if (!pcbddc->ksp_D) { /* create object if not yet build */
917     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
918     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
919     /* default */
920     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
921     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
922     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
923     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
924     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
925   }
926   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
927   /* Allow user's customization */
928   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
929   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
930   if (!n_D) {
931     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
932     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
933   }
934   /* Set Up KSP for Dirichlet problem of BDDC */
935   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
936   /* set ksp_D into pcis data */
937   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
938   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
939   pcis->ksp_D = pcbddc->ksp_D;
940 
941   /* NEUMANN PROBLEM */
942   /* Matrix for Neumann problem is A_RR -> we need to create it */
943   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
944   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
945   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
946   if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */
947     Mat newmat;
948     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
949     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
950     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
951   } else {
952     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
953   }
954   if (!pcbddc->ksp_R) { /* create object if not yet build */
955     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
956     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
957     /* default */
958     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
959     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
960     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
961     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
962     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
963   }
964   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
965   /* Allow user's customization */
966   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
967   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
968   if (!n_R) {
969     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
970     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
971   }
972   /* Set Up KSP for Neumann problem of BDDC */
973   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
974 
975   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
976 
977   /* Dirichlet */
978   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
990   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
991   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
995     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1001     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1002   }
1003 
1004   /* Neumann */
1005   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1006   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1007   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1008   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1009   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1010   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1011   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1012   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1013   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1014   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1015   /* need to be adapted? */
1016   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1017   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1018   /* print info */
1019   if (pcbddc->dbg_flag) {
1020     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1021     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1022   }
1023   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1024     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1025   }
1026   /* free Neumann problem's matrix */
1027   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1033 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1034 {
1035   PetscErrorCode ierr;
1036   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1037 
1038   PetscFunctionBegin;
1039   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1040   if (pcbddc->local_auxmat1) {
1041     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1042     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1049 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1050 {
1051   PetscErrorCode ierr;
1052   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1053   PC_IS*            pcis = (PC_IS*)  (pc->data);
1054   const PetscScalar zero = 0.0;
1055 
1056   PetscFunctionBegin;
1057   /* Application of PHI^T (or PSI^T)  */
1058   if (pcbddc->coarse_psi_B) {
1059     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1060     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1061   } else {
1062     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1063     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1064   }
1065   /* Scatter data of coarse_rhs */
1066   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1067   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1068 
1069   /* Local solution on R nodes */
1070   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1071   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   if (pcbddc->switch_static) {
1074     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   }
1077   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1078   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1079   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   if (pcbddc->switch_static) {
1082     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084   }
1085 
1086   /* Coarse solution */
1087   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1089     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1090   }
1091   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1093 
1094   /* Sum contributions from two levels */
1095   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1096   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1102 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1103 {
1104   PetscErrorCode ierr;
1105   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1106 
1107   PetscFunctionBegin;
1108   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1114 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1115 {
1116   PetscErrorCode ierr;
1117   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1118 
1119   PetscFunctionBegin;
1120   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /* uncomment for testing purposes */
1125 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1126 #undef __FUNCT__
1127 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1128 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1129 {
1130   PetscErrorCode    ierr;
1131   PC_IS*            pcis = (PC_IS*)(pc->data);
1132   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1133   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1134   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1135   MatType           impMatType=MATSEQAIJ;
1136   /* one and zero */
1137   PetscScalar       one=1.0,zero=0.0;
1138   /* space to store constraints and their local indices */
1139   PetscScalar       *temp_quadrature_constraint;
1140   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1141   /* iterators */
1142   PetscInt          i,j,k,total_counts,temp_start_ptr;
1143   /* stuff to store connected components stored in pcbddc->mat_graph */
1144   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1145   PetscInt          n_ISForFaces,n_ISForEdges;
1146   /* near null space stuff */
1147   MatNullSpace      nearnullsp;
1148   const Vec         *nearnullvecs;
1149   Vec               *localnearnullsp;
1150   PetscBool         nnsp_has_cnst;
1151   PetscInt          nnsp_size;
1152   PetscScalar       *array;
1153   /* BLAS integers */
1154   PetscBLASInt      lwork,lierr;
1155   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1156   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1157   /* LAPACK working arrays for SVD or POD */
1158   PetscBool         skip_lapack;
1159   PetscScalar       *work;
1160   PetscReal         *singular_vals;
1161 #if defined(PETSC_USE_COMPLEX)
1162   PetscReal         *rwork;
1163 #endif
1164 #if defined(PETSC_MISSING_LAPACK_GESVD)
1165   PetscBLASInt      Blas_one_2=1;
1166   PetscScalar       *temp_basis,*correlation_mat;
1167 #else
1168   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1169   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1170 #endif
1171   /* change of basis */
1172   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1173   PetscBool         boolforchange;
1174   PetscBT           touched,change_basis;
1175   /* auxiliary stuff */
1176   PetscInt          *nnz,*is_indices,*local_to_B;
1177   /* some quantities */
1178   PetscInt          n_vertices,total_primal_vertices;
1179   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1180 
1181 
1182   PetscFunctionBegin;
1183   /* Get index sets for faces, edges and vertices from graph */
1184   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1185     pcbddc->use_vertices = PETSC_TRUE;
1186   }
1187   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1188   /* HACK: provide functions to set change of basis */
1189   if (!ISForVertices && pcbddc->NullSpace) {
1190     pcbddc->use_change_of_basis = PETSC_TRUE;
1191     pcbddc->use_change_on_faces = PETSC_FALSE;
1192   }
1193   /* print some info */
1194   if (pcbddc->dbg_flag) {
1195     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1196     i = 0;
1197     if (ISForVertices) {
1198       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1199     }
1200     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1201     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1202     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1203     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1204   }
1205   /* check if near null space is attached to global mat */
1206   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1207   if (nearnullsp) {
1208     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1209   } else { /* if near null space is not provided BDDC uses constants by default */
1210     nnsp_size = 0;
1211     nnsp_has_cnst = PETSC_TRUE;
1212   }
1213   /* get max number of constraints on a single cc */
1214   max_constraints = nnsp_size;
1215   if (nnsp_has_cnst) max_constraints++;
1216 
1217   /*
1218        Evaluate maximum storage size needed by the procedure
1219        - temp_indices will contain start index of each constraint stored as follows
1220        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1221        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1222        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1223                                                                                                                                                          */
1224   total_counts = n_ISForFaces+n_ISForEdges;
1225   total_counts *= max_constraints;
1226   n_vertices = 0;
1227   if (ISForVertices) {
1228     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1229   }
1230   total_counts += n_vertices;
1231   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1232   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1233   total_counts = 0;
1234   max_size_of_constraint = 0;
1235   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1236     if (i<n_ISForEdges) {
1237       used_IS = &ISForEdges[i];
1238     } else {
1239       used_IS = &ISForFaces[i-n_ISForEdges];
1240     }
1241     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1242     total_counts += j;
1243     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1244   }
1245   total_counts *= max_constraints;
1246   total_counts += n_vertices;
1247   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1248   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1249   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1250   /* local to boundary numbering */
1251   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1252   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1253   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1254   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1255   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1256   /* get local part of global near null space vectors */
1257   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1258   for (k=0;k<nnsp_size;k++) {
1259     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1260     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262   }
1263 
1264   /* whether or not to skip lapack calls */
1265   skip_lapack = PETSC_TRUE;
1266   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1267 
1268   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1269   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1270     PetscScalar temp_work;
1271 #if defined(PETSC_MISSING_LAPACK_GESVD)
1272     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1273     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1274     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1275     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1276 #if defined(PETSC_USE_COMPLEX)
1277     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1278 #endif
1279     /* now we evaluate the optimal workspace using query with lwork=-1 */
1280     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1281     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1282     lwork = -1;
1283     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1284 #if !defined(PETSC_USE_COMPLEX)
1285     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1286 #else
1287     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1288 #endif
1289     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1290     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1291 #else /* on missing GESVD */
1292     /* SVD */
1293     PetscInt max_n,min_n;
1294     max_n = max_size_of_constraint;
1295     min_n = max_constraints;
1296     if (max_size_of_constraint < max_constraints) {
1297       min_n = max_size_of_constraint;
1298       max_n = max_constraints;
1299     }
1300     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1301 #if defined(PETSC_USE_COMPLEX)
1302     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1303 #endif
1304     /* now we evaluate the optimal workspace using query with lwork=-1 */
1305     lwork = -1;
1306     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1307     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1308     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1309     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1310 #if !defined(PETSC_USE_COMPLEX)
1311     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1312 #else
1313     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1314 #endif
1315     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1316     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1317 #endif /* on missing GESVD */
1318     /* Allocate optimal workspace */
1319     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1320     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1321   }
1322   /* Now we can loop on constraining sets */
1323   total_counts = 0;
1324   temp_indices[0] = 0;
1325   /* vertices */
1326   if (ISForVertices) {
1327     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1328     if (nnsp_has_cnst) { /* consider all vertices */
1329       for (i=0;i<n_vertices;i++) {
1330         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1331         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1332         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1333         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1334         total_counts++;
1335       }
1336     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1337       PetscBool used_vertex;
1338       for (i=0;i<n_vertices;i++) {
1339         used_vertex = PETSC_FALSE;
1340         k = 0;
1341         while (!used_vertex && k<nnsp_size) {
1342           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1343           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1344             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1345             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1346             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1347             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1348             total_counts++;
1349             used_vertex = PETSC_TRUE;
1350           }
1351           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1352           k++;
1353         }
1354       }
1355     }
1356     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1357     n_vertices = total_counts;
1358   }
1359 
1360   /* edges and faces */
1361   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1362     if (i<n_ISForEdges) {
1363       used_IS = &ISForEdges[i];
1364       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1365     } else {
1366       used_IS = &ISForFaces[i-n_ISForEdges];
1367       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1368     }
1369     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1370     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1371     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1372     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1373     /* change of basis should not be performed on local periodic nodes */
1374     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1375     if (nnsp_has_cnst) {
1376       PetscScalar quad_value;
1377       temp_constraints++;
1378       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1379       for (j=0;j<size_of_constraint;j++) {
1380         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1381         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1382         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1383       }
1384       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1385       if (boolforchange) {
1386         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1387       }
1388       total_counts++;
1389     }
1390     for (k=0;k<nnsp_size;k++) {
1391       PetscReal real_value;
1392       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1393       for (j=0;j<size_of_constraint;j++) {
1394         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1395         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1396         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1397       }
1398       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1399       /* check if array is null on the connected component */
1400       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1401       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1402       if (real_value > 0.0) { /* keep indices and values */
1403         temp_constraints++;
1404         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1405         if (boolforchange) {
1406           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1407         }
1408         total_counts++;
1409       }
1410     }
1411     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1412     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1413     if (!pcbddc->use_nnsp_true) {
1414       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1415 
1416 #if defined(PETSC_MISSING_LAPACK_GESVD)
1417       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1418          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1419          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1420             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1421             from that computed using LAPACKgesvd
1422          -> This is due to a different computation of eigenvectors in LAPACKheev
1423          -> The quality of the POD-computed basis will be the same */
1424       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1425       /* Store upper triangular part of correlation matrix */
1426       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1427       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1428       for (j=0;j<temp_constraints;j++) {
1429         for (k=0;k<j+1;k++) {
1430           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1431         }
1432       }
1433       /* compute eigenvalues and eigenvectors of correlation matrix */
1434       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1435       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1436 #if !defined(PETSC_USE_COMPLEX)
1437       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1438 #else
1439       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1440 #endif
1441       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1442       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1443       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1444       j=0;
1445       while (j < temp_constraints && singular_vals[j] < tol) j++;
1446       total_counts=total_counts-j;
1447       /* scale and copy POD basis into used quadrature memory */
1448       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1449       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1450       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1451       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1452       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1453       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1454       if (j<temp_constraints) {
1455         PetscInt ii;
1456         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1457         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1458         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1459         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1460         for (k=0;k<temp_constraints-j;k++) {
1461           for (ii=0;ii<size_of_constraint;ii++) {
1462             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1463           }
1464         }
1465       }
1466 #else  /* on missing GESVD */
1467       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1468       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1469       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1470       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1471 #if !defined(PETSC_USE_COMPLEX)
1472       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1473 #else
1474       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1475 #endif
1476       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1477       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1478       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1479       k = temp_constraints;
1480       if (k > size_of_constraint) k = size_of_constraint;
1481       j = 0;
1482       while (j < k && singular_vals[k-j-1] < tol) j++;
1483       total_counts = total_counts-temp_constraints+k-j;
1484 #endif /* on missing GESVD */
1485     }
1486   }
1487   /* free index sets of faces, edges and vertices */
1488   for (i=0;i<n_ISForFaces;i++) {
1489     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1490   }
1491   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1492   for (i=0;i<n_ISForEdges;i++) {
1493     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1494   }
1495   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1496   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1497 
1498   /* free workspace */
1499   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1500     ierr = PetscFree(work);CHKERRQ(ierr);
1501 #if defined(PETSC_USE_COMPLEX)
1502     ierr = PetscFree(rwork);CHKERRQ(ierr);
1503 #endif
1504     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1505 #if defined(PETSC_MISSING_LAPACK_GESVD)
1506     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1507     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1508 #endif
1509   }
1510   for (k=0;k<nnsp_size;k++) {
1511     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1512   }
1513   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1514 
1515   /* set quantities in pcbddc data structure */
1516   /* n_vertices defines the number of subdomain corners in the primal space */
1517   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1518   pcbddc->local_primal_size = total_counts;
1519   pcbddc->n_vertices = n_vertices;
1520   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1521 
1522   /* Create constraint matrix */
1523   /* The constraint matrix is used to compute the l2g map of primal dofs */
1524   /* so we need to set it up properly either with or without change of basis */
1525   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1526   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1527   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1528   /* array to compute a local numbering of constraints : vertices first then constraints */
1529   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1530   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1531   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1532   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1533   /* auxiliary stuff for basis change */
1534   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1535   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1536 
1537   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1538   total_primal_vertices=0;
1539   for (i=0;i<pcbddc->local_primal_size;i++) {
1540     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1541     if (size_of_constraint == 1) {
1542       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1543       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1544       aux_primal_minloc[total_primal_vertices]=0;
1545       total_primal_vertices++;
1546     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1547       PetscInt min_loc,min_index;
1548       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1549       /* find first untouched local node */
1550       k = 0;
1551       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1552       min_index = global_indices[k];
1553       min_loc = k;
1554       /* search the minimum among global nodes already untouched on the cc */
1555       for (k=1;k<size_of_constraint;k++) {
1556         /* there can be more than one constraint on a single connected component */
1557         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1558           min_index = global_indices[k];
1559           min_loc = k;
1560         }
1561       }
1562       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1563       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1564       aux_primal_minloc[total_primal_vertices]=min_loc;
1565       total_primal_vertices++;
1566     }
1567   }
1568   /* free workspace */
1569   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1570   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1571   /* permute indices in order to have a sorted set of vertices */
1572   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1573 
1574   /* nonzero structure of constraint matrix */
1575   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1576   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1577   j=total_primal_vertices;
1578   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1579     if (!PetscBTLookup(change_basis,i)) {
1580       nnz[j]=temp_indices[i+1]-temp_indices[i];
1581       j++;
1582     }
1583   }
1584   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1585   ierr = PetscFree(nnz);CHKERRQ(ierr);
1586   /* set values in constraint matrix */
1587   for (i=0;i<total_primal_vertices;i++) {
1588     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1589   }
1590   total_counts = total_primal_vertices;
1591   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1592     if (!PetscBTLookup(change_basis,i)) {
1593       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1594       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1595       total_counts++;
1596     }
1597   }
1598   /* assembling */
1599   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   /*
1602   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1603   */
1604   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1605   if (pcbddc->use_change_of_basis) {
1606     PetscBool qr_needed = PETSC_FALSE;
1607     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1608     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1609     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1610     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1611     /* work arrays */
1612     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1613     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1614     /* nonzeros per row */
1615     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1616       if (PetscBTLookup(change_basis,i)) {
1617         qr_needed = PETSC_TRUE;
1618         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1619         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1620       }
1621     }
1622     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1623     ierr = PetscFree(nnz);CHKERRQ(ierr);
1624     /* Set initial identity in the matrix */
1625     for (i=0;i<pcis->n_B;i++) {
1626       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1627     }
1628 
1629     /* Now we loop on the constraints which need a change of basis */
1630     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1631     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1632     /* Change of basis matrix T computed via QR decomposition of constraints */
1633     if (qr_needed) {
1634       /* dual and primal dofs on a single cc */
1635       PetscInt     dual_dofs,primal_dofs;
1636       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1637       PetscInt     primal_counter;
1638       /* working stuff for GEQRF */
1639       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1640       PetscBLASInt lqr_work;
1641       /* working stuff for UNGQR */
1642       PetscScalar  *gqr_work,lgqr_work_t;
1643       PetscBLASInt lgqr_work;
1644       /* working stuff for TRTRS */
1645       PetscScalar  *trs_rhs;
1646       PetscBLASInt Blas_NRHS;
1647       /* pointers for values insertion into change of basis matrix */
1648       PetscInt     *start_rows,*start_cols;
1649       PetscScalar  *start_vals;
1650       /* working stuff for values insertion */
1651       PetscBT      is_primal;
1652 
1653       /* space to store Q */
1654       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1655       /* first we issue queries for optimal work */
1656       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1657       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1658       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1659       lqr_work = -1;
1660       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1661       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1662       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1663       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1664       lgqr_work = -1;
1665       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1666       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1667       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1668       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1669       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1670       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1671       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1672       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1673       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1674       /* array to store scaling factors for reflectors */
1675       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1676       /* array to store rhs and solution of triangular solver */
1677       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1678       /* array to store whether a node is primal or not */
1679       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1680       for (i=0;i<total_primal_vertices;i++) {
1681         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1682       }
1683 
1684       /* allocating workspace for check */
1685       if (pcbddc->dbg_flag) {
1686         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1687         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1688         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1689       }
1690 
1691       /* loop on constraints and see whether or not they need a change of basis */
1692       /* -> using implicit ordering contained in temp_indices data */
1693       total_counts = pcbddc->n_vertices;
1694       primal_counter = total_counts;
1695       while (total_counts<pcbddc->local_primal_size) {
1696         primal_dofs = 1;
1697         if (PetscBTLookup(change_basis,total_counts)) {
1698           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1699           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1700             primal_dofs++;
1701           }
1702           /* get constraint info */
1703           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1704           dual_dofs = size_of_constraint-primal_dofs;
1705 
1706           /* copy quadrature constraints for change of basis check */
1707           if (pcbddc->dbg_flag) {
1708             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1709             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1710           }
1711 
1712           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1713           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1714 
1715           /* compute QR decomposition of constraints */
1716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1717           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1719           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1720           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1721           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1722           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1723 
1724           /* explictly compute R^-T */
1725           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1726           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1727           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1728           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1729           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1730           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1731           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1732           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1733           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1734           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1735 
1736           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1737           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1738           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1739           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1740           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1741           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1742           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1743           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1744           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1745 
1746           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1747              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1748              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1749           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1750           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1751           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1752           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1753           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1754           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1755           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1756           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1757           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1758           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1759 
1760           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1761           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1762           /* insert cols for primal dofs */
1763           for (j=0;j<primal_dofs;j++) {
1764             start_vals = &qr_basis[j*size_of_constraint];
1765             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1766             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1767           }
1768           /* insert cols for dual dofs */
1769           for (j=0,k=0;j<dual_dofs;k++) {
1770             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1771               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1772               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1773               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1774               j++;
1775             }
1776           }
1777 
1778           /* check change of basis */
1779           if (pcbddc->dbg_flag) {
1780             PetscInt   ii,jj;
1781             PetscBool valid_qr=PETSC_TRUE;
1782             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1783             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1784             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1785             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1786             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1787             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1788             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1789             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1790             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1791             for (jj=0;jj<size_of_constraint;jj++) {
1792               for (ii=0;ii<primal_dofs;ii++) {
1793                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1794                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1795               }
1796             }
1797             if (!valid_qr) {
1798               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1799               for (jj=0;jj<size_of_constraint;jj++) {
1800                 for (ii=0;ii<primal_dofs;ii++) {
1801                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1802                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1803                   }
1804                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1805                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1806                   }
1807                 }
1808               }
1809             } else {
1810               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1811             }
1812           }
1813           /* increment primal counter */
1814           primal_counter += primal_dofs;
1815         } else {
1816           if (pcbddc->dbg_flag) {
1817             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1818           }
1819         }
1820         /* increment constraint counter total_counts */
1821         total_counts += primal_dofs;
1822       }
1823       if (pcbddc->dbg_flag) {
1824         ierr = PetscFree(work);CHKERRQ(ierr);
1825       }
1826       /* free workspace */
1827       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1828       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1829       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1830       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1831       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1832       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1833     }
1834     /* assembling */
1835     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1836     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1837     /*
1838     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1839     */
1840   }
1841   if (pcbddc->dbg_flag) {
1842     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1843   }
1844   /* free workspace */
1845   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1846   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1847   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1848   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1849   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1850   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1851   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1852   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1858 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1859 {
1860   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1861   PC_IS       *pcis = (PC_IS*)pc->data;
1862   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1863   PetscInt    bs,ierr,i,vertex_size;
1864   PetscViewer viewer=pcbddc->dbg_viewer;
1865 
1866   PetscFunctionBegin;
1867   /* Init local Graph struct */
1868   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1869 
1870   /* Check validity of the csr graph passed in by the user */
1871   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1872     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1873   }
1874   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1875   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1876     Mat mat_adj;
1877     const PetscInt *xadj,*adjncy;
1878     PetscBool flg_row=PETSC_TRUE;
1879 
1880     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1881     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1882     if (!flg_row) {
1883       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1884     }
1885     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1886     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1887     if (!flg_row) {
1888       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1889     }
1890     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1891   }
1892 
1893   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1894   vertex_size = 1;
1895   if (!pcbddc->n_ISForDofs) {
1896     IS *custom_ISForDofs;
1897 
1898     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1899     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1900     for (i=0;i<bs;i++) {
1901       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1902     }
1903     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1904     /* remove my references to IS objects */
1905     for (i=0;i<bs;i++) {
1906       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1907     }
1908     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1909   } else { /* mat block size as vertex size (used for elasticity) */
1910     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1911   }
1912 
1913   /* Setup of Graph */
1914   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1915 
1916   /* Graph's connected components analysis */
1917   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1918 
1919   /* print some info to stdout */
1920   if (pcbddc->dbg_flag) {
1921     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1928 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1929 {
1930   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1931   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1932   PetscErrorCode ierr;
1933 
1934   PetscFunctionBegin;
1935   n = 0;
1936   vertices = 0;
1937   if (pcbddc->ConstraintMatrix) {
1938     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1939     for (i=0;i<local_primal_size;i++) {
1940       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1941       if (size_of_constraint == 1) n++;
1942       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1943     }
1944     if (vertices_idx) {
1945       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1946       n = 0;
1947       for (i=0;i<local_primal_size;i++) {
1948         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1949         if (size_of_constraint == 1) {
1950           vertices[n++]=row_cmat_indices[0];
1951         }
1952         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1953       }
1954     }
1955   }
1956   *n_vertices = n;
1957   if (vertices_idx) *vertices_idx = vertices;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1963 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1964 {
1965   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1966   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1967   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1968   PetscBT        touched;
1969   PetscErrorCode ierr;
1970 
1971     /* This function assumes that the number of local constraints per connected component
1972        is not greater than the number of nodes defined for the connected component
1973        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1974   PetscFunctionBegin;
1975   n = 0;
1976   constraints_index = 0;
1977   if (pcbddc->ConstraintMatrix) {
1978     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1979     max_size_of_constraint = 0;
1980     for (i=0;i<local_primal_size;i++) {
1981       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1982       if (size_of_constraint > 1) {
1983         n++;
1984       }
1985       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1986       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1987     }
1988     if (constraints_idx) {
1989       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1990       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1991       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1992       n = 0;
1993       for (i=0;i<local_primal_size;i++) {
1994         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1995         if (size_of_constraint > 1) {
1996           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1997           /* find first untouched local node */
1998           j = 0;
1999           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2000           min_index = row_cmat_global_indices[j];
2001           min_loc = j;
2002           /* search the minimum among nodes not yet touched on the connected component
2003              since there can be more than one constraint on a single cc */
2004           for (j=1;j<size_of_constraint;j++) {
2005             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2006               min_index = row_cmat_global_indices[j];
2007               min_loc = j;
2008             }
2009           }
2010           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2011           constraints_index[n++] = row_cmat_indices[min_loc];
2012         }
2013         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2014       }
2015       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2016       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2017     }
2018   }
2019   *n_constraints = n;
2020   if (constraints_idx) *constraints_idx = constraints_index;
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 /* the next two functions has been adapted from pcis.c */
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PCBDDCApplySchur"
2027 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2028 {
2029   PetscErrorCode ierr;
2030   PC_IS          *pcis = (PC_IS*)(pc->data);
2031 
2032   PetscFunctionBegin;
2033   if (!vec2_B) { vec2_B = v; }
2034   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2035   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2036   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2037   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2038   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2044 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2045 {
2046   PetscErrorCode ierr;
2047   PC_IS          *pcis = (PC_IS*)(pc->data);
2048 
2049   PetscFunctionBegin;
2050   if (!vec2_B) { vec2_B = v; }
2051   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2052   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2053   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2054   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2055   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "PCBDDCSubsetNumbering"
2061 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2062 {
2063   Vec            local_vec,global_vec;
2064   IS             seqis,paris;
2065   VecScatter     scatter_ctx;
2066   PetscScalar    *array;
2067   PetscInt       *temp_global_dofs;
2068   PetscScalar    globalsum;
2069   PetscInt       i,j,s;
2070   PetscInt       nlocals,first_index,old_index,max_local;
2071   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2072   PetscMPIInt    *dof_sizes,*dof_displs;
2073   PetscBool      first_found;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   /* mpi buffers */
2078   MPI_Comm_size(comm,&size_prec_comm);
2079   MPI_Comm_rank(comm,&rank_prec_comm);
2080   j = ( !rank_prec_comm ? size_prec_comm : 0);
2081   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2082   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2083   /* get maximum size of subset */
2084   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2085   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2086   max_local = 0;
2087   if (n_local_dofs) {
2088     max_local = temp_global_dofs[0];
2089     for (i=1;i<n_local_dofs;i++) {
2090       if (max_local < temp_global_dofs[i] ) {
2091         max_local = temp_global_dofs[i];
2092       }
2093     }
2094   }
2095   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2096   max_global++;
2097   max_local = 0;
2098   if (n_local_dofs) {
2099     max_local = local_dofs[0];
2100     for (i=1;i<n_local_dofs;i++) {
2101       if (max_local < local_dofs[i] ) {
2102         max_local = local_dofs[i];
2103       }
2104     }
2105   }
2106   max_local++;
2107   /* allocate workspace */
2108   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2109   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2110   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2111   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2112   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2113   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2114   /* create scatter */
2115   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2116   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2117   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2118   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2119   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2120   /* init array */
2121   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2122   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2123   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2124   if (local_dofs_mult) {
2125     for (i=0;i<n_local_dofs;i++) {
2126       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2127     }
2128   } else {
2129     for (i=0;i<n_local_dofs;i++) {
2130       array[local_dofs[i]]=1.0;
2131     }
2132   }
2133   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2134   /* scatter into global vec and get total number of global dofs */
2135   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2136   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2137   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2138   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2139   /* Fill global_vec with cumulative function for global numbering */
2140   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2141   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2142   nlocals = 0;
2143   first_index = -1;
2144   first_found = PETSC_FALSE;
2145   for (i=0;i<s;i++) {
2146     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2147       first_found = PETSC_TRUE;
2148       first_index = i;
2149     }
2150     nlocals += (PetscInt)PetscRealPart(array[i]);
2151   }
2152   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2153   if (!rank_prec_comm) {
2154     dof_displs[0]=0;
2155     for (i=1;i<size_prec_comm;i++) {
2156       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2157     }
2158   }
2159   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2160   if (first_found) {
2161     array[first_index] += (PetscScalar)nlocals;
2162     old_index = first_index;
2163     for (i=first_index+1;i<s;i++) {
2164       if (PetscRealPart(array[i]) > 0.0) {
2165         array[i] += array[old_index];
2166         old_index = i;
2167       }
2168     }
2169   }
2170   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2171   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2172   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2173   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2174   /* get global ordering of local dofs */
2175   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2176   if (local_dofs_mult) {
2177     for (i=0;i<n_local_dofs;i++) {
2178       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2179     }
2180   } else {
2181     for (i=0;i<n_local_dofs;i++) {
2182       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2183     }
2184   }
2185   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2186   /* free workspace */
2187   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2188   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2189   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2190   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2191   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2192   /* return pointer to global ordering of local dofs */
2193   *global_numbering_subset = temp_global_dofs;
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2199 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2200 {
2201   PetscInt       i,j;
2202   PetscScalar    *alphas;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   /* this implements stabilized Gram-Schmidt */
2207   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2208   for (i=0;i<n;i++) {
2209     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2210     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2211     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2212   }
2213   ierr = PetscFree(alphas);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2218 #undef __FUNCT__
2219 #define __FUNCT__ "MatConvert_IS_AIJ"
2220 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2221 {
2222   Mat new_mat;
2223   Mat_IS *matis = (Mat_IS*)(mat->data);
2224   /* info on mat */
2225   PetscInt bs,rows,cols;
2226   PetscInt lrows,lcols;
2227   PetscInt local_rows,local_cols;
2228   PetscMPIInt nsubdomains;
2229   /* preallocation */
2230   Vec vec_dnz,vec_onz;
2231   PetscScalar *my_dnz,*my_onz,*array;
2232   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2233   PetscInt  index_row,index_col,owner;
2234   PetscInt  *local_indices,*global_indices;
2235   /* work */
2236   PetscInt i,j;
2237   PetscErrorCode ierr;
2238 
2239   PetscFunctionBegin;
2240   /* CHECKS */
2241   /* get info from mat */
2242   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2243   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2244   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2245   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2246 
2247   /* MAT_INITIAL_MATRIX */
2248   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2249   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2250   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2251   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2252   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2253   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2254 
2255   /*
2256     preallocation
2257   */
2258 
2259   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2260   /*
2261      Some vectors are needed to sum up properly on shared interface dofs.
2262      Note that preallocation is not exact, since it overestimates nonzeros
2263   */
2264 /*
2265   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2266   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2267   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2268   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2269 */
2270   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2271   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2272   /* All processes need to compute entire row ownership */
2273   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2274   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2275   for (i=0;i<nsubdomains;i++) {
2276     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2277       row_ownership[j]=i;
2278     }
2279   }
2280   /* map indices of local mat to global values */
2281   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2282   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2283   for (i=0;i<local_rows;i++) local_indices[i]=i;
2284   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2285   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2286 
2287   /*
2288      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2289      then, they will be summed up properly. This way, preallocation is always sufficient
2290   */
2291   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2292   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2293   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2294   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2295   for (i=0;i<local_rows;i++) {
2296     index_row = global_indices[i];
2297     for (j=i;j<local_rows;j++) {
2298       owner = row_ownership[index_row];
2299       index_col = global_indices[j];
2300       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2301         my_dnz[i] += 1.0;
2302       } else { /* offdiag block */
2303         my_onz[i] += 1.0;
2304       }
2305       /* same as before, interchanging rows and cols */
2306       if (i != j) {
2307         owner = row_ownership[index_col];
2308         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2309           my_dnz[j] += 1.0;
2310         } else {
2311           my_onz[j] += 1.0;
2312         }
2313       }
2314     }
2315   }
2316   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2317   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2318   if (local_rows) { /* multilevel guard */
2319     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2320     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2321   }
2322   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2323   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2324   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2325   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2326   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2327   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2328   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2329 
2330   /* set computed preallocation in dnz and onz */
2331   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2332   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2333   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2334   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2335   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2336   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2337   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2338   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2339 
2340   /* Resize preallocation if overestimated */
2341   for (i=0;i<lrows;i++) {
2342     dnz[i] = PetscMin(dnz[i],lcols);
2343     onz[i] = PetscMin(onz[i],cols-lcols);
2344   }
2345   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2346   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2347 
2348   /*
2349     Set values. Very Basic.
2350   */
2351   for (i=0;i<local_rows;i++) {
2352     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2353     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2354     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2355     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2356     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2357   }
2358   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361 
2362   /* get back mat */
2363   *M = new_mat;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatISSubassemble_Private"
2369 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2370 {
2371   Mat             subdomain_adj;
2372   IS              new_ranks,ranks_send_to;
2373   MatPartitioning partitioner;
2374   Mat_IS          *matis;
2375   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2376   PetscInt        prank;
2377   PetscMPIInt     size,rank,color;
2378   PetscInt        *xadj,*adjncy,*oldranks;
2379   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2380   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2381   PetscErrorCode  ierr;
2382   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2383   PetscSubcomm    subcomm;
2384 
2385   PetscFunctionBegin;
2386   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2387   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2388   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2389 
2390   /* Get info on mapping */
2391   matis = (Mat_IS*)(mat->data);
2392   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2393   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2394 
2395   /* build local CSR graph of subdomains' connectivity */
2396   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2397   xadj[0] = 0;
2398   xadj[1] = PetscMax(n_neighs-1,0);
2399   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2400   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2401 
2402   if (threshold) {
2403     PetscInt* count,min_threshold;
2404     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2405     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2406     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2407       for (j=0;j<n_shared[i];j++) {
2408         count[shared[i][j]] += 1;
2409       }
2410     }
2411     /* adapt threshold since we dont want to lose significant connections */
2412     min_threshold = n_neighs;
2413     for (i=1;i<n_neighs;i++) {
2414       for (j=0;j<n_shared[i];j++) {
2415         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2416       }
2417     }
2418     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2419     threshold = PetscMax(min_threshold+1,threshold);
2420     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2421     xadj[1] = 0;
2422     for (i=1;i<n_neighs;i++) {
2423       for (j=0;j<n_shared[i];j++) {
2424         if (count[shared[i][j]] < threshold) {
2425           adjncy[xadj[1]] = neighs[i];
2426           adjncy_wgt[xadj[1]] = n_shared[i];
2427           xadj[1]++;
2428           break;
2429         }
2430       }
2431     }
2432     ierr = PetscFree(count);CHKERRQ(ierr);
2433   } else {
2434     if (xadj[1]) {
2435       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2436       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2437     }
2438   }
2439   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2440   if (use_square) {
2441     for (i=0;i<xadj[1];i++) {
2442       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2443     }
2444   }
2445   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2446 
2447   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2448 
2449   /*
2450     Restrict work on active processes only.
2451   */
2452   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2453   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2454   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2455   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2456   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2457   if (color) {
2458     ierr = PetscFree(xadj);CHKERRQ(ierr);
2459     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2460     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2461   } else {
2462     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2463     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2464     prank = rank;
2465     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2466     for (i=0;i<size;i++) {
2467       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2468     }
2469     for (i=0;i<xadj[1];i++) {
2470       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2471     }
2472     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2473     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2474     n_subdomains = (PetscInt)size;
2475     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2476 
2477     /* Partition */
2478     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2479     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2480     if (use_vwgt) {
2481       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2482       v_wgt[0] = local_size;
2483       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2484     }
2485     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2486     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2487     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2488     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2489     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2490 
2491     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2492     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2493     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2494     /* clean up */
2495     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2496     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2497     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2498     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2499   }
2500   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2501 
2502   /* assemble parallel IS for sends */
2503   i = 1;
2504   if (color) i=0;
2505   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2506   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2507 
2508   /* get back IS */
2509   *is_sends = ranks_send_to;
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatISSubassemble"
2517 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2518 {
2519   Mat                    local_mat,new_mat;
2520   Mat_IS                 *matis;
2521   IS                     is_sends_internal;
2522   PetscInt               rows,cols;
2523   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2524   PetscBool              ismatis,isdense;
2525   ISLocalToGlobalMapping l2gmap;
2526   PetscInt*              l2gmap_indices;
2527   const PetscInt*        is_indices;
2528   MatType                new_local_type;
2529   MatTypePrivate         new_local_type_private;
2530   /* buffers */
2531   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2532   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2533   /* MPI */
2534   MPI_Comm               comm;
2535   PetscMPIInt            n_sends,n_recvs,commsize;
2536   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2537   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2538   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2539   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2540   PetscErrorCode         ierr;
2541 
2542   PetscFunctionBegin;
2543   /* checks */
2544   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2545   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2546   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2547   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2548   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2549   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2550   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2551   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2552   PetscValidLogicalCollectiveInt(mat,bs,0);
2553   /* prepare IS for sending if not provided */
2554   if (!is_sends) {
2555     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2556     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2557   } else {
2558     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2559     is_sends_internal = is_sends;
2560   }
2561 
2562   /* get pointer of MATIS data */
2563   matis = (Mat_IS*)mat->data;
2564 
2565   /* get comm */
2566   comm = PetscObjectComm((PetscObject)mat);
2567 
2568   /* compute number of sends */
2569   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2570   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2571 
2572   /* compute number of receives */
2573   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2574   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2575   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2576   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2577   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2578   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2579   ierr = PetscFree(iflags);CHKERRQ(ierr);
2580 
2581   /* prepare send/receive buffers */
2582   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2583   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2584   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2585   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2586 
2587   /* Get data from local mat */
2588   if (!isdense) {
2589     /* TODO: See below some guidelines on how to prepare the local buffers */
2590     /*
2591        send_buffer_vals should contain the raw values of the local matrix
2592        send_buffer_idxs should contain:
2593        - MatType_PRIVATE type
2594        - PetscInt        size_of_l2gmap
2595        - PetscInt        global_row_indices[size_of_l2gmap]
2596        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2597     */
2598   } else {
2599     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2600     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2601     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2602     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2603     send_buffer_idxs[1] = i;
2604     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2605     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2606     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2607     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2608     for (i=0;i<n_sends;i++) {
2609       ilengths_vals[is_indices[i]] = len*len;
2610       ilengths_idxs[is_indices[i]] = len+2;
2611     }
2612   }
2613   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2614   buf_size_idxs = 0;
2615   buf_size_vals = 0;
2616   for (i=0;i<n_recvs;i++) {
2617     buf_size_idxs += (PetscInt)olengths_idxs[i];
2618     buf_size_vals += (PetscInt)olengths_vals[i];
2619   }
2620   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2621   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2622 
2623   /* get new tags for clean communications */
2624   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2625   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2626 
2627   /* allocate for requests */
2628   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2629   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2630   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2631   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2632 
2633   /* communications */
2634   ptr_idxs = recv_buffer_idxs;
2635   ptr_vals = recv_buffer_vals;
2636   for (i=0;i<n_recvs;i++) {
2637     source_dest = onodes[i];
2638     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2639     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2640     ptr_idxs += olengths_idxs[i];
2641     ptr_vals += olengths_vals[i];
2642   }
2643   for (i=0;i<n_sends;i++) {
2644     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2645     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2646     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2647   }
2648   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2649   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2650 
2651   /* assemble new l2g map */
2652   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2653   ptr_idxs = recv_buffer_idxs;
2654   buf_size_idxs = 0;
2655   for (i=0;i<n_recvs;i++) {
2656     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2657     ptr_idxs += olengths_idxs[i];
2658   }
2659   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2660   ptr_idxs = recv_buffer_idxs;
2661   buf_size_idxs = 0;
2662   for (i=0;i<n_recvs;i++) {
2663     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2664     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2665     ptr_idxs += olengths_idxs[i];
2666   }
2667   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2668   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2669   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2670 
2671   /* infer new local matrix type from received local matrices type */
2672   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2673   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2674   new_local_type_private = MATAIJ_PRIVATE;
2675   new_local_type = MATSEQAIJ;
2676   if (n_recvs) {
2677     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2678     ptr_idxs = recv_buffer_idxs;
2679     for (i=0;i<n_recvs;i++) {
2680       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2681         new_local_type_private = MATAIJ_PRIVATE;
2682         break;
2683       }
2684       ptr_idxs += olengths_idxs[i];
2685     }
2686     switch (new_local_type_private) {
2687       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2688         new_local_type = MATSEQAIJ;
2689         bs = 1;
2690         break;
2691       case MATAIJ_PRIVATE:
2692         new_local_type = MATSEQAIJ;
2693         bs = 1;
2694         break;
2695       case MATBAIJ_PRIVATE:
2696         new_local_type = MATSEQBAIJ;
2697         break;
2698       case MATSBAIJ_PRIVATE:
2699         new_local_type = MATSEQSBAIJ;
2700         break;
2701       default:
2702         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2703         break;
2704     }
2705   }
2706 
2707   /* create MATIS object */
2708   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2709   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2710   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2711   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2712   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2713   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2714 
2715   /* set values */
2716   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2717   ptr_vals = recv_buffer_vals;
2718   ptr_idxs = recv_buffer_idxs;
2719   for (i=0;i<n_recvs;i++) {
2720     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2722       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2723       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2724       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2725       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2726     }
2727     ptr_idxs += olengths_idxs[i];
2728     ptr_vals += olengths_vals[i];
2729   }
2730   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2731   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2732   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2734 
2735   { /* check */
2736     Vec       lvec,rvec;
2737     PetscReal infty_error;
2738 
2739     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2740     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2741     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2742     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2743     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2744     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2745     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2746     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2747     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2748   }
2749 
2750   /* free workspace */
2751   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2752   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2753   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2754   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2755   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2756   if (isdense) {
2757     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2758     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2759   } else {
2760     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2761   }
2762   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2763   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2764   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2765   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2766   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2767   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2768   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2769   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2770   ierr = PetscFree(onodes);CHKERRQ(ierr);
2771   /* get back new mat */
2772   *mat_n = new_mat;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 #undef __FUNCT__
2777 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2778 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2779 {
2780   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2781   PC_IS                  *pcis = (PC_IS*)pc->data;
2782   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2783   MatNullSpace           CoarseNullSpace=NULL;
2784   ISLocalToGlobalMapping coarse_islg;
2785   IS                     coarse_is;
2786   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2787   PetscInt               im_active=-1,active_procs=-1;
2788   PC                     pc_temp;
2789   PCType                 coarse_pc_type;
2790   KSPType                coarse_ksp_type;
2791   PetscBool              multilevel_requested,multilevel_allowed;
2792   PetscBool              setsym,issym,isbddc,isnn;
2793   MatStructure           matstruct;
2794   PetscErrorCode         ierr;
2795 
2796   PetscFunctionBegin;
2797   /* Assign global numbering to coarse dofs */
2798   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2799 
2800   /* infer some info from user */
2801   issym = PETSC_FALSE;
2802   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2803   multilevel_allowed = PETSC_FALSE;
2804   multilevel_requested = PETSC_FALSE;
2805   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2806   if (multilevel_requested) {
2807     /* count "active processes" */
2808     im_active = !!(pcis->n);
2809     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2810     if (active_procs/pcbddc->coarsening_ratio < 2) {
2811       multilevel_allowed = PETSC_FALSE;
2812     } else {
2813       multilevel_allowed = PETSC_TRUE;
2814     }
2815   }
2816 
2817   /* set defaults for coarse KSP and PC */
2818   if (multilevel_allowed) {
2819     if (issym) {
2820       coarse_ksp_type = KSPRICHARDSON;
2821     } else {
2822       coarse_ksp_type = KSPCHEBYSHEV;
2823     }
2824     coarse_pc_type = PCBDDC;
2825   } else {
2826     coarse_ksp_type = KSPPREONLY;
2827     coarse_pc_type = PCREDUNDANT;
2828   }
2829 
2830   /* create the coarse KSP object only once with defaults */
2831   if (!pcbddc->coarse_ksp) {
2832     char prefix[256],str_level[3];
2833     size_t len;
2834     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2835     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2836     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2837     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2838     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2839     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2840     /* prefix */
2841     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2842     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2843     if (!pcbddc->current_level) {
2844       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2845       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2846     } else {
2847       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2848       if (pcbddc->current_level>1) len -= 2;
2849       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2850       *(prefix+len)='\0';
2851       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2852       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
2853     }
2854     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
2855   }
2856   /* allow user customization */
2857   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2858   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2859   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2860 
2861   /* get some info after set from options */
2862   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2863   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2864   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2865   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2866     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2867     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2868     isbddc = PETSC_FALSE;
2869   }
2870 
2871   /* propagate BDDC info to the next level */
2872   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2873   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2874   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2875 
2876   /* creates temporary MATIS object for coarse matrix */
2877   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2878   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2879   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2880   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2881   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2882   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2885   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2886   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2887 
2888   /* assemble coarse matrix */
2889   if (isbddc || isnn) {
2890     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2891   } else {
2892     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2893   }
2894   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2895 
2896   /* create local to global scatters for coarse problem */
2897   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2898   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2899   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2900 
2901   /* propagate symmetry info to coarse matrix */
2902   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2903 
2904   /* Compute coarse null space (special handling by BDDC only) */
2905   if (pcbddc->NullSpace) {
2906     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2907     if (isbddc) {
2908       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2909     } else {
2910       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2911     }
2912   }
2913 
2914   /* set operators */
2915   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2916   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2917 
2918   /* additional KSP customization */
2919   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2920   if (max_it < 5) {
2921     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2922   }
2923   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
2924 
2925 
2926   /* print some info if requested */
2927   if (pcbddc->dbg_flag) {
2928     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
2929     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
2930     if (!multilevel_allowed) {
2931       if (multilevel_requested) {
2932         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2933       } else if (pcbddc->max_levels) {
2934         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
2935       }
2936     }
2937     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
2938     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2939   }
2940 
2941   /* setup coarse ksp */
2942   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2943   if (pcbddc->dbg_flag) {
2944     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
2945     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2946     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
2947     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2948   }
2949 
2950   /* Check coarse problem if requested */
2951   if (pcbddc->dbg_flag) {
2952     KSP       check_ksp;
2953     KSPType   check_ksp_type;
2954     PC        check_pc;
2955     Vec       check_vec;
2956     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
2957     PetscInt  its;
2958     PetscBool ispreonly,compute;
2959 
2960     /* Create ksp object suitable for estimation of extreme eigenvalues */
2961     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
2962     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2963     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2964     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
2965     if (ispreonly) {
2966       check_ksp_type = KSPPREONLY;
2967       compute = PETSC_FALSE;
2968     } else {
2969       if (issym) check_ksp_type = KSPCG;
2970       else check_ksp_type = KSPGMRES;
2971       compute = PETSC_TRUE;
2972     }
2973     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2974     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
2975     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2976     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2977     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2978     /* create random vec */
2979     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2980     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2981     if (CoarseNullSpace) {
2982       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2983     }
2984     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2985     /* solve coarse problem */
2986     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2987     if (CoarseNullSpace) {
2988       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2989     }
2990     /* check coarse problem residual error */
2991     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2992     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2993     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2994     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2995     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2996     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
2997     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
2998     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
2999     /* get eigenvalue estimation if preonly has not been requested */
3000     if (!ispreonly) {
3001       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3002       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3003       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3004     }
3005     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3006     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3007   }
3008   /* free memory */
3009   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3010   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3016 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3017 {
3018   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3019   PC_IS*         pcis = (PC_IS*)pc->data;
3020   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3021   PetscInt       i,j,coarse_size;
3022   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3023   PetscErrorCode ierr;
3024 
3025   PetscFunctionBegin;
3026   /* get indices in local ordering for vertices and constraints */
3027   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3028   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3029   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3030   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3031   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3032   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3033   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3034 
3035   /* Compute global number of coarse dofs */
3036   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3037 
3038   /* check numbering */
3039   if (pcbddc->dbg_flag) {
3040     PetscScalar coarsesum,*array;
3041 
3042     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3043     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3044     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3045     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3046     for (i=0;i<pcbddc->local_primal_size;i++) {
3047       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3048     }
3049     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3050     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3051     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3052     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3053     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3054     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3055     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3056     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3057     for (i=0;i<pcis->n;i++) {
3058       if (array[i] == 1.0) {
3059         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3060       }
3061     }
3062     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3063     for (i=0;i<pcis->n;i++) {
3064       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3065     }
3066     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3067     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3068     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3069     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3070     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3071     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3072     if (pcbddc->dbg_flag > 1) {
3073       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3074       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3075       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3076       for (i=0;i<pcbddc->local_primal_size;i++) {
3077         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3078       }
3079       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3080     }
3081     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3082   }
3083   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3084   /* get back data */
3085   *coarse_size_n = coarse_size;
3086   *local_primal_indices_n = local_primal_indices;
3087   PetscFunctionReturn(0);
3088 }
3089 
3090