xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c7317a1961f7de1514a675ae6a7b8e3ff355d813)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559     unsymmetric_check = PETSC_TRUE;
560   } else { /* take references to already computed coarse basis */
561     unsymmetric_check = PETSC_FALSE;
562     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
563     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
564     if (pcbddc->coarse_phi_D) {
565       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
566       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
567     }
568   }
569   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
570   /* Checking coarse_sub_mat and coarse basis functios */
571   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
572   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
573   if (pcbddc->dbg_flag) {
574     Mat         coarse_sub_mat;
575     Mat         AUXMAT,TM1,TM2,TM3,TM4;
576     Mat         coarse_phi_D,coarse_phi_B;
577     Mat         coarse_psi_D,coarse_psi_B;
578     Mat         A_II,A_BB,A_IB,A_BI;
579     MatType     checkmattype=MATSEQAIJ;
580     PetscReal   real_value;
581 
582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
588     if (unsymmetric_check) {
589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
591     }
592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
593 
594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597     if (unsymmetric_check) {
598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     } else {
611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
619     }
620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
627     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
632     }
633     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
634     for (i=0;i<pcbddc->local_primal_size;i++) {
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
636     }
637     if (unsymmetric_check) {
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
641       }
642       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
643       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
645       }
646     }
647     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
649     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
652     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
653     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
656     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
663     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
664     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
665     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
666   }
667   /* free memory */
668   if (n_vertices) {
669     ierr = PetscFree(vertices);CHKERRQ(ierr);
670     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
672     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
675   }
676   if (n_constraints) {
677     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
678     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
679     ierr = MatDestroy(&M1);CHKERRQ(ierr);
680     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
681   }
682   ierr = PetscFree(auxindices);CHKERRQ(ierr);
683   /* get back data */
684   *coarse_submat_vals_n = coarse_submat_vals;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
691 {
692   PC_IS*            pcis = (PC_IS*)(pc->data);
693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
695   PetscBool         issbaij,isbaij;
696   /* manage repeated solves */
697   MatReuse          reuse;
698   MatStructure      matstruct;
699   PetscErrorCode    ierr;
700 
701   PetscFunctionBegin;
702   /* get mat flags */
703   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
704   reuse = MAT_INITIAL_MATRIX;
705   if (pc->setupcalled) {
706     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
707     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
708     if (matstruct == SAME_NONZERO_PATTERN) {
709       reuse = MAT_REUSE_MATRIX;
710     } else {
711       reuse = MAT_INITIAL_MATRIX;
712     }
713   }
714   if (reuse == MAT_INITIAL_MATRIX) {
715     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
716     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
717     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
720   }
721 
722   /* transform local matrices if needed */
723   if (pcbddc->use_change_of_basis) {
724     Mat         change_mat_all;
725     PetscScalar *row_cmat_values;
726     PetscInt    *row_cmat_indices;
727     PetscInt    *nnz,*is_indices,*temp_indices;
728     PetscInt    i,j,k,n_D,n_B;
729 
730     /* Get Non-overlapping dimensions */
731     n_B = pcis->n_B;
732     n_D = pcis->n-n_B;
733 
734     /* compute nonzero structure of change of basis on all local nodes */
735     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     k=1;
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
743       nnz[is_indices[i]]=j;
744       if (k < j) k = j;
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     /* assemble change of basis matrix on the whole set of local dofs */
749     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
750     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
751     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
752     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
753     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
754     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     for (i=0;i<n_D;i++) {
756       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
757     }
758     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
759     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
760     for (i=0;i<n_B;i++) {
761       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
762       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
763       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
764       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
765     }
766     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
769     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
770     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
771     if (!issbaij && !isbaij) {
772       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773     } else {
774       Mat work_mat;
775       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
776       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
777       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
778     }
779     /*
780     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
781     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
782     */
783     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
784     ierr = PetscFree(nnz);CHKERRQ(ierr);
785     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
786   } else {
787     /* without change of basis, the local matrix is unchanged */
788     if (!pcbddc->local_mat) {
789       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
790       pcbddc->local_mat = matis->A;
791     }
792   }
793 
794   /* get submatrices */
795   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
796   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
797   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
798   if (!issbaij) {
799     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
800     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
801   } else {
802     Mat newmat;
803     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
804     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
805     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
806     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
814 {
815   PC_IS*         pcis = (PC_IS*)(pc->data);
816   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
817   IS             is_aux1,is_aux2;
818   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
819   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
820   PetscInt       vbs,bs;
821   PetscBT        bitmask;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   /* Set Non-overlapping dimensions */
826   n_B = pcis->n_B; n_D = pcis->n - n_B;
827   /* get vertex indices from constraint matrix */
828   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
829 
830   /* create auxiliary bitmask */
831   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
832   for (i=0;i<n_vertices;i++) {
833     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
834   }
835 
836   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
837   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
838   for (i=0, n_R=0; i<pcis->n; i++) {
839     if (!PetscBTLookup(bitmask,i)) {
840       idx_R_local[n_R] = i;
841       n_R++;
842     }
843   }
844 
845   /* Block code */
846   vbs = 1;
847   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
848   if (bs>1 && !(n_vertices%bs)) {
849     PetscBool is_blocked = PETSC_TRUE;
850     PetscInt  *vary;
851     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
852     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
853     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
854     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
855     for (i=0; i<n_vertices; i++) {
856       if (vary[i]!=0 && vary[i]!=bs) {
857         is_blocked = PETSC_FALSE;
858         break;
859       }
860     }
861     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
862       vbs = bs;
863       for (i=0;i<n_R/vbs;i++) {
864         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
865       }
866     }
867     ierr = PetscFree(vary);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(vertices);CHKERRQ(ierr);
870   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
871   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
872 
873   /* print some info if requested */
874   if (pcbddc->dbg_flag) {
875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
877     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
878     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
879     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
880     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
885   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
886   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
887   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0; i<n_D; i++) {
890     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
891   }
892   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_R; i++) {
894     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
895       aux_array1[j++] = i;
896     }
897   }
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
899   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
900   for (i=0, j=0; i<n_B; i++) {
901     if (!PetscBTLookup(bitmask,is_indices[i])) {
902       aux_array2[j++] = i;
903     }
904   }
905   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
906   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
907   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
908   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
909   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
910 
911   if (pcbddc->switch_static || pcbddc->dbg_flag) {
912     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
913     for (i=0, j=0; i<n_R; i++) {
914       if (PetscBTLookup(bitmask,idx_R_local[i])) {
915         aux_array1[j++] = i;
916       }
917     }
918     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
919     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
920     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
921   }
922   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
931 {
932   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
933   PC_IS          *pcis = (PC_IS*)pc->data;
934   PC             pc_temp;
935   Mat            A_RR;
936   Vec            vec1,vec2,vec3;
937   MatStructure   matstruct;
938   PetscScalar    m_one = -1.0;
939   PetscReal      value;
940   PetscInt       n_D,n_R,ibs,mbs;
941   PetscBool      use_exact,use_exact_reduced,issbaij;
942   PetscErrorCode ierr;
943   /* prefixes stuff */
944   char           dir_prefix[256],neu_prefix[256],str_level[3];
945   size_t         len;
946 
947   PetscFunctionBegin;
948   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
949 
950   /* compute prefixes */
951   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
952   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
953   if (!pcbddc->current_level) {
954     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
955     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
956     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
957     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
958   } else {
959     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
960     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
961     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
962     len -= 15; /* remove "pc_bddc_coarse_" */
963     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
964     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
965     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
966     *(dir_prefix+len)='\0';
967     *(neu_prefix+len)='\0';
968     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
969     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
970     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
971     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
972   }
973 
974   /* DIRICHLET PROBLEM */
975   /* Matrix for Dirichlet problem is pcis->A_II */
976   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
977   if (!pcbddc->ksp_D) { /* create object if not yet build */
978     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
979     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
980     /* default */
981     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
982     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
983     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
984     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
985     if (issbaij) {
986       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
987     } else {
988       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
989     }
990     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
991   }
992   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
993   /* Allow user's customization */
994   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
996   if (!n_D) {
997     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
998     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
999   }
1000   /* Set Up KSP for Dirichlet problem of BDDC */
1001   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1002   /* set ksp_D into pcis data */
1003   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1004   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1005   pcis->ksp_D = pcbddc->ksp_D;
1006 
1007   /* NEUMANN PROBLEM */
1008   /* Matrix for Neumann problem is A_RR -> we need to create it */
1009   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1011   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1012   if (ibs != mbs) {
1013     Mat newmat;
1014     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1015     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1016     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1017   } else {
1018     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1019   }
1020   if (!pcbddc->ksp_R) { /* create object if not yet build */
1021     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1022     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1023     /* default */
1024     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1025     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1026     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1027     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1028     if (issbaij) {
1029       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1030     } else {
1031       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1032     }
1033     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1034   }
1035   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1036   /* Allow user's customization */
1037   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1038   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1039   if (!n_R) {
1040     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1041     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1042   }
1043   /* Set Up KSP for Neumann problem of BDDC */
1044   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1045 
1046   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1047 
1048   /* Dirichlet */
1049   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1050   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1051   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1052   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1053   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1054   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1055   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1056   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1057   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1058   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1059   /* need to be adapted? */
1060   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1061   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1062   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1063   /* print info */
1064   if (pcbddc->dbg_flag) {
1065     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1068     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1070   }
1071   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1072     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1073   }
1074 
1075   /* Neumann */
1076   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1077   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1078   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1079   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1080   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1081   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1082   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1083   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1084   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1085   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1086   /* need to be adapted? */
1087   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1088   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089   /* print info */
1090   if (pcbddc->dbg_flag) {
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1095     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1096   }
1097   /* free Neumann problem's matrix */
1098   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1104 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1105 {
1106   PetscErrorCode ierr;
1107   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1108 
1109   PetscFunctionBegin;
1110   if (applytranspose) {
1111     if (pcbddc->local_auxmat1) {
1112       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1113       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1114     }
1115     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1116   } else {
1117     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1118     if (pcbddc->local_auxmat1) {
1119       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1120       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1121     }
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1127 #undef __FUNCT__
1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1129 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1130 {
1131   PetscErrorCode ierr;
1132   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1133   PC_IS*            pcis = (PC_IS*)  (pc->data);
1134   const PetscScalar zero = 0.0;
1135 
1136   PetscFunctionBegin;
1137   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1138   if (applytranspose) {
1139     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1140     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1141   } else {
1142     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1143     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1144   }
1145   /* Scatter data of coarse_rhs */
1146   ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr);
1147   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148 
1149   /* Local solution on R nodes */
1150   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1151   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   if (pcbddc->switch_static) {
1154     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   }
1157   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1158   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1159   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161   if (pcbddc->switch_static) {
1162     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1163     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   }
1165 
1166   /* Coarse solution */
1167   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168   /* TODO remove null space when doing multilevel */
1169   ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1170   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172 
1173   /* Sum contributions from two levels */
1174   if (applytranspose) {
1175     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1176     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1177   } else {
1178     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1179     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1186 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1187 {
1188   PetscErrorCode ierr;
1189   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1190 
1191   PetscFunctionBegin;
1192   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1198 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1199 {
1200   PetscErrorCode ierr;
1201   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1202 
1203   PetscFunctionBegin;
1204   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 /* uncomment for testing purposes */
1209 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1210 #undef __FUNCT__
1211 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1212 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1213 {
1214   PetscErrorCode    ierr;
1215   PC_IS*            pcis = (PC_IS*)(pc->data);
1216   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1217   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1218   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1219   MatType           impMatType=MATSEQAIJ;
1220   /* one and zero */
1221   PetscScalar       one=1.0,zero=0.0;
1222   /* space to store constraints and their local indices */
1223   PetscScalar       *temp_quadrature_constraint;
1224   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1225   /* iterators */
1226   PetscInt          i,j,k,total_counts,temp_start_ptr;
1227   /* stuff to store connected components stored in pcbddc->mat_graph */
1228   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1229   PetscInt          n_ISForFaces,n_ISForEdges;
1230   /* near null space stuff */
1231   MatNullSpace      nearnullsp;
1232   const Vec         *nearnullvecs;
1233   Vec               *localnearnullsp;
1234   PetscBool         nnsp_has_cnst;
1235   PetscInt          nnsp_size;
1236   PetscScalar       *array;
1237   /* BLAS integers */
1238   PetscBLASInt      lwork,lierr;
1239   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1240   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1241   /* LAPACK working arrays for SVD or POD */
1242   PetscBool         skip_lapack;
1243   PetscScalar       *work;
1244   PetscReal         *singular_vals;
1245 #if defined(PETSC_USE_COMPLEX)
1246   PetscReal         *rwork;
1247 #endif
1248 #if defined(PETSC_MISSING_LAPACK_GESVD)
1249   PetscBLASInt      Blas_one_2=1;
1250   PetscScalar       *temp_basis,*correlation_mat;
1251 #else
1252   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1253   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1254 #endif
1255   /* change of basis */
1256   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1257   PetscBool         boolforchange,qr_needed;
1258   PetscBT           touched,change_basis,qr_needed_idx;
1259   /* auxiliary stuff */
1260   PetscInt          *nnz,*is_indices,*local_to_B;
1261   /* some quantities */
1262   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1263   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1264 
1265 
1266   PetscFunctionBegin;
1267   /* Get index sets for faces, edges and vertices from graph */
1268   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1269     pcbddc->use_vertices = PETSC_TRUE;
1270   }
1271   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1272   /* HACK: provide functions to set change of basis */
1273   if (!ISForVertices && pcbddc->NullSpace) {
1274     pcbddc->use_change_of_basis = PETSC_TRUE;
1275     pcbddc->use_change_on_faces = PETSC_FALSE;
1276   }
1277   /* print some info */
1278   if (pcbddc->dbg_flag) {
1279     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1280     i = 0;
1281     if (ISForVertices) {
1282       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1283     }
1284     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1287     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1288   }
1289   /* check if near null space is attached to global mat */
1290   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1291   if (nearnullsp) {
1292     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1293   } else { /* if near null space is not provided BDDC uses constants by default */
1294     nnsp_size = 0;
1295     nnsp_has_cnst = PETSC_TRUE;
1296   }
1297   /* get max number of constraints on a single cc */
1298   max_constraints = nnsp_size;
1299   if (nnsp_has_cnst) max_constraints++;
1300 
1301   /*
1302        Evaluate maximum storage size needed by the procedure
1303        - temp_indices will contain start index of each constraint stored as follows
1304        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1305        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1306        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1307                                                                                                                                                          */
1308   total_counts = n_ISForFaces+n_ISForEdges;
1309   total_counts *= max_constraints;
1310   n_vertices = 0;
1311   if (ISForVertices) {
1312     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1313   }
1314   total_counts += n_vertices;
1315   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1316   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1317   total_counts = 0;
1318   max_size_of_constraint = 0;
1319   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1320     if (i<n_ISForEdges) {
1321       used_IS = &ISForEdges[i];
1322     } else {
1323       used_IS = &ISForFaces[i-n_ISForEdges];
1324     }
1325     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1326     total_counts += j;
1327     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1328   }
1329   total_counts *= max_constraints;
1330   total_counts += n_vertices;
1331   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1332   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1333   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1334   /* local to boundary numbering */
1335   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1336   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1337   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1338   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1339   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1340   /* get local part of global near null space vectors */
1341   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1342   for (k=0;k<nnsp_size;k++) {
1343     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1344     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346   }
1347 
1348   /* whether or not to skip lapack calls */
1349   skip_lapack = PETSC_TRUE;
1350   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1351 
1352   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1353   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1354     PetscScalar temp_work;
1355 #if defined(PETSC_MISSING_LAPACK_GESVD)
1356     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1357     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1358     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1359     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1360 #if defined(PETSC_USE_COMPLEX)
1361     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1362 #endif
1363     /* now we evaluate the optimal workspace using query with lwork=-1 */
1364     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1365     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1366     lwork = -1;
1367     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1368 #if !defined(PETSC_USE_COMPLEX)
1369     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1370 #else
1371     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1372 #endif
1373     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1374     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1375 #else /* on missing GESVD */
1376     /* SVD */
1377     PetscInt max_n,min_n;
1378     max_n = max_size_of_constraint;
1379     min_n = max_constraints;
1380     if (max_size_of_constraint < max_constraints) {
1381       min_n = max_size_of_constraint;
1382       max_n = max_constraints;
1383     }
1384     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1385 #if defined(PETSC_USE_COMPLEX)
1386     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1387 #endif
1388     /* now we evaluate the optimal workspace using query with lwork=-1 */
1389     lwork = -1;
1390     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1391     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1392     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1393     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1394 #if !defined(PETSC_USE_COMPLEX)
1395     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1396 #else
1397     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1398 #endif
1399     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1400     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1401 #endif /* on missing GESVD */
1402     /* Allocate optimal workspace */
1403     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1404     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1405   }
1406   /* Now we can loop on constraining sets */
1407   total_counts = 0;
1408   temp_indices[0] = 0;
1409   /* vertices */
1410   if (ISForVertices) {
1411     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1412     if (nnsp_has_cnst) { /* consider all vertices */
1413       for (i=0;i<n_vertices;i++) {
1414         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1415         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1416         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1417         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1418         total_counts++;
1419       }
1420     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1421       PetscBool used_vertex;
1422       for (i=0;i<n_vertices;i++) {
1423         used_vertex = PETSC_FALSE;
1424         k = 0;
1425         while (!used_vertex && k<nnsp_size) {
1426           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1427           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1428             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1429             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1430             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1431             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1432             total_counts++;
1433             used_vertex = PETSC_TRUE;
1434           }
1435           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1436           k++;
1437         }
1438       }
1439     }
1440     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1441     n_vertices = total_counts;
1442   }
1443 
1444   /* edges and faces */
1445   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1446     if (i<n_ISForEdges) {
1447       used_IS = &ISForEdges[i];
1448       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1449     } else {
1450       used_IS = &ISForFaces[i-n_ISForEdges];
1451       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1452     }
1453     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1454     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1455     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1456     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1457     /* change of basis should not be performed on local periodic nodes */
1458     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1459     if (nnsp_has_cnst) {
1460       PetscScalar quad_value;
1461       temp_constraints++;
1462       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1463       for (j=0;j<size_of_constraint;j++) {
1464         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1465         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1466         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1467       }
1468       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1469       total_counts++;
1470     }
1471     for (k=0;k<nnsp_size;k++) {
1472       PetscReal real_value;
1473       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1474       for (j=0;j<size_of_constraint;j++) {
1475         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1476         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1477         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1478       }
1479       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1480       /* check if array is null on the connected component */
1481       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1482       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1483       if (real_value > 0.0) { /* keep indices and values */
1484         temp_constraints++;
1485         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1486         total_counts++;
1487       }
1488     }
1489     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1490     valid_constraints = temp_constraints;
1491     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1492     if (!pcbddc->use_nnsp_true && temp_constraints) {
1493       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1494 
1495 #if defined(PETSC_MISSING_LAPACK_GESVD)
1496       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1497          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1498          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1499             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1500             from that computed using LAPACKgesvd
1501          -> This is due to a different computation of eigenvectors in LAPACKheev
1502          -> The quality of the POD-computed basis will be the same */
1503       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1504       /* Store upper triangular part of correlation matrix */
1505       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1506       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1507       for (j=0;j<temp_constraints;j++) {
1508         for (k=0;k<j+1;k++) {
1509           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1510         }
1511       }
1512       /* compute eigenvalues and eigenvectors of correlation matrix */
1513       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1514       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1515 #if !defined(PETSC_USE_COMPLEX)
1516       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1517 #else
1518       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1519 #endif
1520       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1521       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1522       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1523       j=0;
1524       while (j < temp_constraints && singular_vals[j] < tol) j++;
1525       total_counts=total_counts-j;
1526       valid_constraints = temp_constraints-j;
1527       /* scale and copy POD basis into used quadrature memory */
1528       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1529       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1530       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1532       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1533       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1534       if (j<temp_constraints) {
1535         PetscInt ii;
1536         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1537         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1538         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1539         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1540         for (k=0;k<temp_constraints-j;k++) {
1541           for (ii=0;ii<size_of_constraint;ii++) {
1542             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1543           }
1544         }
1545       }
1546 #else  /* on missing GESVD */
1547       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1548       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1549       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1550       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1551 #if !defined(PETSC_USE_COMPLEX)
1552       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1553 #else
1554       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1555 #endif
1556       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1557       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1558       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1559       k = temp_constraints;
1560       if (k > size_of_constraint) k = size_of_constraint;
1561       j = 0;
1562       while (j < k && singular_vals[k-j-1] < tol) j++;
1563       total_counts = total_counts-temp_constraints+k-j;
1564       valid_constraints = k-j;
1565 #endif /* on missing GESVD */
1566     }
1567     /* setting change_of_basis flag is safe now */
1568     if (boolforchange) {
1569       for (j=0;j<valid_constraints;j++) {
1570         PetscBTSet(change_basis,total_counts-j-1);
1571       }
1572     }
1573   }
1574   /* free index sets of faces, edges and vertices */
1575   for (i=0;i<n_ISForFaces;i++) {
1576     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1577   }
1578   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1579   for (i=0;i<n_ISForEdges;i++) {
1580     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1581   }
1582   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1583   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1584 
1585   /* free workspace */
1586   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1587     ierr = PetscFree(work);CHKERRQ(ierr);
1588 #if defined(PETSC_USE_COMPLEX)
1589     ierr = PetscFree(rwork);CHKERRQ(ierr);
1590 #endif
1591     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1592 #if defined(PETSC_MISSING_LAPACK_GESVD)
1593     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1594     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1595 #endif
1596   }
1597   for (k=0;k<nnsp_size;k++) {
1598     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1599   }
1600   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1601 
1602   /* set quantities in pcbddc data structure */
1603   /* n_vertices defines the number of subdomain corners in the primal space */
1604   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1605   pcbddc->local_primal_size = total_counts;
1606   pcbddc->n_vertices = n_vertices;
1607   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1608 
1609   /* Create constraint matrix */
1610   /* The constraint matrix is used to compute the l2g map of primal dofs */
1611   /* so we need to set it up properly either with or without change of basis */
1612   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1613   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1614   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1615   /* array to compute a local numbering of constraints : vertices first then constraints */
1616   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1617   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1618   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1619   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1620   /* auxiliary stuff for basis change */
1621   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1622   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1623 
1624   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1625   total_primal_vertices=0;
1626   for (i=0;i<pcbddc->local_primal_size;i++) {
1627     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1628     if (size_of_constraint == 1) {
1629       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1630       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1631       aux_primal_minloc[total_primal_vertices]=0;
1632       total_primal_vertices++;
1633     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1634       PetscInt min_loc,min_index;
1635       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1636       /* find first untouched local node */
1637       k = 0;
1638       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1639       min_index = global_indices[k];
1640       min_loc = k;
1641       /* search the minimum among global nodes already untouched on the cc */
1642       for (k=1;k<size_of_constraint;k++) {
1643         /* there can be more than one constraint on a single connected component */
1644         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1645           min_index = global_indices[k];
1646           min_loc = k;
1647         }
1648       }
1649       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1650       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1651       aux_primal_minloc[total_primal_vertices]=min_loc;
1652       total_primal_vertices++;
1653     }
1654   }
1655   /* determine if a QR strategy is needed for change of basis */
1656   qr_needed = PETSC_FALSE;
1657   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1658   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1659     if (PetscBTLookup(change_basis,i)) {
1660       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1661       j = 0;
1662       for (k=0;k<size_of_constraint;k++) {
1663         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1664           j++;
1665         }
1666       }
1667       /* found more than one primal dof on the cc */
1668       if (j > 1) {
1669         PetscBTSet(qr_needed_idx,i);
1670         qr_needed = PETSC_TRUE;
1671       }
1672     }
1673   }
1674   /* free workspace */
1675   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1676 
1677   /* permute indices in order to have a sorted set of vertices */
1678   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1679 
1680   /* nonzero structure of constraint matrix */
1681   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1682   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1683   j=total_primal_vertices;
1684   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1685     if (!PetscBTLookup(change_basis,i)) {
1686       nnz[j]=temp_indices[i+1]-temp_indices[i];
1687       j++;
1688     }
1689   }
1690   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1691   ierr = PetscFree(nnz);CHKERRQ(ierr);
1692   /* set values in constraint matrix */
1693   for (i=0;i<total_primal_vertices;i++) {
1694     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1695   }
1696   total_counts = total_primal_vertices;
1697   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1698     if (!PetscBTLookup(change_basis,i)) {
1699       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1700       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1701       total_counts++;
1702     }
1703   }
1704   /* assembling */
1705   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1706   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   /*
1708   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1709   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1710   */
1711   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1712   if (pcbddc->use_change_of_basis) {
1713     /* dual and primal dofs on a single cc */
1714     PetscInt     dual_dofs,primal_dofs;
1715     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1716     PetscInt     primal_counter;
1717     /* working stuff for GEQRF */
1718     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1719     PetscBLASInt lqr_work;
1720     /* working stuff for UNGQR */
1721     PetscScalar  *gqr_work,lgqr_work_t;
1722     PetscBLASInt lgqr_work;
1723     /* working stuff for TRTRS */
1724     PetscScalar  *trs_rhs;
1725     PetscBLASInt Blas_NRHS;
1726     /* pointers for values insertion into change of basis matrix */
1727     PetscInt     *start_rows,*start_cols;
1728     PetscScalar  *start_vals;
1729     /* working stuff for values insertion */
1730     PetscBT      is_primal;
1731 
1732     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1733     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1734     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1735     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1736     /* work arrays */
1737     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1738     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1739     /* nonzeros per row */
1740     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1741       if (PetscBTLookup(change_basis,i)) {
1742         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1743         if (PetscBTLookup(qr_needed_idx,i)) {
1744           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1745         } else {
1746           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1747           /* get local primal index on the cc */
1748           j = 0;
1749           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1750           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1751         }
1752       }
1753     }
1754     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1755     ierr = PetscFree(nnz);CHKERRQ(ierr);
1756     /* Set initial identity in the matrix */
1757     for (i=0;i<pcis->n_B;i++) {
1758       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1759     }
1760 
1761     if (pcbddc->dbg_flag) {
1762       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1763       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1764     }
1765 
1766 
1767     /* Now we loop on the constraints which need a change of basis */
1768     /*
1769        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1770        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1771 
1772        Basic blocks of change of basis matrix T computed
1773 
1774           - Using the following block transformation if there is only a primal dof on the cc
1775             (in the example, primal dof is the last one of the edge in LOCAL ordering
1776              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1777             | 1        0   ...        0     1 |
1778             | 0        1   ...        0     1 |
1779             |              ...                |
1780             | 0        ...            1     1 |
1781             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1782 
1783           - via QR decomposition of constraints otherwise
1784     */
1785     if (qr_needed) {
1786       /* space to store Q */
1787       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1788       /* first we issue queries for optimal work */
1789       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1790       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1791       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1792       lqr_work = -1;
1793       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1794       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1795       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1796       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1797       lgqr_work = -1;
1798       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1799       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1800       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1801       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1802       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1803       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1804       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1805       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1806       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1807       /* array to store scaling factors for reflectors */
1808       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1809       /* array to store rhs and solution of triangular solver */
1810       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1811       /* allocating workspace for check */
1812       if (pcbddc->dbg_flag) {
1813         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1814       }
1815     }
1816     /* array to store whether a node is primal or not */
1817     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1818     for (i=0;i<total_primal_vertices;i++) {
1819       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1820     }
1821 
1822     /* loop on constraints and see whether or not they need a change of basis and compute it */
1823     /* -> using implicit ordering contained in temp_indices data */
1824     total_counts = pcbddc->n_vertices;
1825     primal_counter = total_counts;
1826     while (total_counts<pcbddc->local_primal_size) {
1827       primal_dofs = 1;
1828       if (PetscBTLookup(change_basis,total_counts)) {
1829         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1830         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1831           primal_dofs++;
1832         }
1833         /* get constraint info */
1834         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1835         dual_dofs = size_of_constraint-primal_dofs;
1836 
1837         if (pcbddc->dbg_flag) {
1838           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1839         }
1840 
1841         if (primal_dofs > 1) { /* QR */
1842 
1843           /* copy quadrature constraints for change of basis check */
1844           if (pcbddc->dbg_flag) {
1845             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1846           }
1847           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1848           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1849 
1850           /* compute QR decomposition of constraints */
1851           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1852           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1854           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1855           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1856           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1857           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1858 
1859           /* explictly compute R^-T */
1860           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1861           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1862           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1863           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1864           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1866           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1867           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1868           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1869           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1870 
1871           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1872           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1873           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1874           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1875           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1876           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1877           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1878           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1879           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1880 
1881           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1882              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1883              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1884           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1885           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1886           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1887           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1888           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1889           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1890           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1891           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1892           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1893           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1894 
1895           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1896           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1897           /* insert cols for primal dofs */
1898           for (j=0;j<primal_dofs;j++) {
1899             start_vals = &qr_basis[j*size_of_constraint];
1900             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1901             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1902           }
1903           /* insert cols for dual dofs */
1904           for (j=0,k=0;j<dual_dofs;k++) {
1905             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1906               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1907               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1908               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1909               j++;
1910             }
1911           }
1912 
1913           /* check change of basis */
1914           if (pcbddc->dbg_flag) {
1915             PetscInt   ii,jj;
1916             PetscBool valid_qr=PETSC_TRUE;
1917             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1918             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1919             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1920             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1921             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1922             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1923             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1924             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1925             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1926             for (jj=0;jj<size_of_constraint;jj++) {
1927               for (ii=0;ii<primal_dofs;ii++) {
1928                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1929                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1930               }
1931             }
1932             if (!valid_qr) {
1933               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1934               for (jj=0;jj<size_of_constraint;jj++) {
1935                 for (ii=0;ii<primal_dofs;ii++) {
1936                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1937                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1938                   }
1939                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1940                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1941                   }
1942                 }
1943               }
1944             } else {
1945               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1946             }
1947           }
1948         } else { /* simple transformation block */
1949           PetscInt row,col;
1950           PetscScalar val;
1951           for (j=0;j<size_of_constraint;j++) {
1952             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1953             if (!PetscBTLookup(is_primal,row)) {
1954               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1955               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1956               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1957             } else {
1958               for (k=0;k<size_of_constraint;k++) {
1959                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1960                 if (row != col) {
1961                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1962                 } else {
1963                   val = 1.0;
1964                 }
1965                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1966               }
1967             }
1968           }
1969         }
1970         /* increment primal counter */
1971         primal_counter += primal_dofs;
1972       } else {
1973         if (pcbddc->dbg_flag) {
1974           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1975         }
1976       }
1977       /* increment constraint counter total_counts */
1978       total_counts += primal_dofs;
1979     }
1980 
1981     /* free workspace */
1982     if (qr_needed) {
1983       if (pcbddc->dbg_flag) {
1984         ierr = PetscFree(work);CHKERRQ(ierr);
1985       }
1986       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1987       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1988       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1989       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1990       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1991     }
1992     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1993     /* assembling */
1994     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996     /*
1997     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1998     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1999     */
2000   }
2001 
2002   /* flush dbg viewer */
2003   if (pcbddc->dbg_flag) {
2004     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2005   }
2006 
2007   /* free workspace */
2008   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2009   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2010   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2011   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2012   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2013   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2014   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2015   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2016   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2017   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2023 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2024 {
2025   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2026   PC_IS       *pcis = (PC_IS*)pc->data;
2027   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2028   PetscInt    bs,ierr,i,vertex_size;
2029   PetscViewer viewer=pcbddc->dbg_viewer;
2030 
2031   PetscFunctionBegin;
2032   /* Init local Graph struct */
2033   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2034 
2035   /* Check validity of the csr graph passed in by the user */
2036   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2037     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2038   }
2039 
2040   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2041   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2042     Mat mat_adj;
2043     const PetscInt *xadj,*adjncy;
2044     PetscBool flg_row=PETSC_TRUE;
2045 
2046     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2047     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2048     if (!flg_row) {
2049       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2050     }
2051     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2052     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2053     if (!flg_row) {
2054       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2055     }
2056     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2057   }
2058 
2059   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2060   vertex_size = 1;
2061   if (!pcbddc->user_provided_isfordofs) {
2062     if (!pcbddc->n_ISForDofs) {
2063       IS *custom_ISForDofs;
2064 
2065       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2066       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2067       for (i=0;i<bs;i++) {
2068         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2069       }
2070       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2071       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2072       /* remove my references to IS objects */
2073       for (i=0;i<bs;i++) {
2074         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2075       }
2076       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2077     }
2078   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2079     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2080   }
2081 
2082   /* Setup of Graph */
2083   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2084 
2085   /* Graph's connected components analysis */
2086   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2087 
2088   /* print some info to stdout */
2089   if (pcbddc->dbg_flag) {
2090     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2097 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2098 {
2099   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2100   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2101   PetscErrorCode ierr;
2102 
2103   PetscFunctionBegin;
2104   n = 0;
2105   vertices = 0;
2106   if (pcbddc->ConstraintMatrix) {
2107     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2108     for (i=0;i<local_primal_size;i++) {
2109       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2110       if (size_of_constraint == 1) n++;
2111       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2112     }
2113     if (vertices_idx) {
2114       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2115       n = 0;
2116       for (i=0;i<local_primal_size;i++) {
2117         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2118         if (size_of_constraint == 1) {
2119           vertices[n++]=row_cmat_indices[0];
2120         }
2121         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2122       }
2123     }
2124   }
2125   *n_vertices = n;
2126   if (vertices_idx) *vertices_idx = vertices;
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2132 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2133 {
2134   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2135   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2136   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2137   PetscBT        touched;
2138   PetscErrorCode ierr;
2139 
2140     /* This function assumes that the number of local constraints per connected component
2141        is not greater than the number of nodes defined for the connected component
2142        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2143   PetscFunctionBegin;
2144   n = 0;
2145   constraints_index = 0;
2146   if (pcbddc->ConstraintMatrix) {
2147     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2148     max_size_of_constraint = 0;
2149     for (i=0;i<local_primal_size;i++) {
2150       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2151       if (size_of_constraint > 1) {
2152         n++;
2153       }
2154       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2155       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2156     }
2157     if (constraints_idx) {
2158       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2159       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2160       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2161       n = 0;
2162       for (i=0;i<local_primal_size;i++) {
2163         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2164         if (size_of_constraint > 1) {
2165           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2166           /* find first untouched local node */
2167           j = 0;
2168           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2169           min_index = row_cmat_global_indices[j];
2170           min_loc = j;
2171           /* search the minimum among nodes not yet touched on the connected component
2172              since there can be more than one constraint on a single cc */
2173           for (j=1;j<size_of_constraint;j++) {
2174             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2175               min_index = row_cmat_global_indices[j];
2176               min_loc = j;
2177             }
2178           }
2179           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2180           constraints_index[n++] = row_cmat_indices[min_loc];
2181         }
2182         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2183       }
2184       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2185       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2186     }
2187   }
2188   *n_constraints = n;
2189   if (constraints_idx) *constraints_idx = constraints_index;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /* the next two functions has been adapted from pcis.c */
2194 #undef __FUNCT__
2195 #define __FUNCT__ "PCBDDCApplySchur"
2196 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2197 {
2198   PetscErrorCode ierr;
2199   PC_IS          *pcis = (PC_IS*)(pc->data);
2200 
2201   PetscFunctionBegin;
2202   if (!vec2_B) { vec2_B = v; }
2203   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2204   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2205   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2206   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2207   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2213 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2214 {
2215   PetscErrorCode ierr;
2216   PC_IS          *pcis = (PC_IS*)(pc->data);
2217 
2218   PetscFunctionBegin;
2219   if (!vec2_B) { vec2_B = v; }
2220   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2221   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2222   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2223   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2224   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "PCBDDCSubsetNumbering"
2230 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2231 {
2232   Vec            local_vec,global_vec;
2233   IS             seqis,paris;
2234   VecScatter     scatter_ctx;
2235   PetscScalar    *array;
2236   PetscInt       *temp_global_dofs;
2237   PetscScalar    globalsum;
2238   PetscInt       i,j,s;
2239   PetscInt       nlocals,first_index,old_index,max_local;
2240   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2241   PetscMPIInt    *dof_sizes,*dof_displs;
2242   PetscBool      first_found;
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   /* mpi buffers */
2247   MPI_Comm_size(comm,&size_prec_comm);
2248   MPI_Comm_rank(comm,&rank_prec_comm);
2249   j = ( !rank_prec_comm ? size_prec_comm : 0);
2250   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2251   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2252   /* get maximum size of subset */
2253   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2254   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2255   max_local = 0;
2256   if (n_local_dofs) {
2257     max_local = temp_global_dofs[0];
2258     for (i=1;i<n_local_dofs;i++) {
2259       if (max_local < temp_global_dofs[i] ) {
2260         max_local = temp_global_dofs[i];
2261       }
2262     }
2263   }
2264   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2265   max_global++;
2266   max_local = 0;
2267   if (n_local_dofs) {
2268     max_local = local_dofs[0];
2269     for (i=1;i<n_local_dofs;i++) {
2270       if (max_local < local_dofs[i] ) {
2271         max_local = local_dofs[i];
2272       }
2273     }
2274   }
2275   max_local++;
2276   /* allocate workspace */
2277   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2278   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2279   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2280   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2281   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2282   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2283   /* create scatter */
2284   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2285   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2286   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2287   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2288   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2289   /* init array */
2290   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2291   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2292   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2293   if (local_dofs_mult) {
2294     for (i=0;i<n_local_dofs;i++) {
2295       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2296     }
2297   } else {
2298     for (i=0;i<n_local_dofs;i++) {
2299       array[local_dofs[i]]=1.0;
2300     }
2301   }
2302   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2303   /* scatter into global vec and get total number of global dofs */
2304   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2305   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2306   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2307   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2308   /* Fill global_vec with cumulative function for global numbering */
2309   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2310   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2311   nlocals = 0;
2312   first_index = -1;
2313   first_found = PETSC_FALSE;
2314   for (i=0;i<s;i++) {
2315     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2316       first_found = PETSC_TRUE;
2317       first_index = i;
2318     }
2319     nlocals += (PetscInt)PetscRealPart(array[i]);
2320   }
2321   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2322   if (!rank_prec_comm) {
2323     dof_displs[0]=0;
2324     for (i=1;i<size_prec_comm;i++) {
2325       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2326     }
2327   }
2328   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2329   if (first_found) {
2330     array[first_index] += (PetscScalar)nlocals;
2331     old_index = first_index;
2332     for (i=first_index+1;i<s;i++) {
2333       if (PetscRealPart(array[i]) > 0.0) {
2334         array[i] += array[old_index];
2335         old_index = i;
2336       }
2337     }
2338   }
2339   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2340   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2341   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2342   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2343   /* get global ordering of local dofs */
2344   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2345   if (local_dofs_mult) {
2346     for (i=0;i<n_local_dofs;i++) {
2347       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2348     }
2349   } else {
2350     for (i=0;i<n_local_dofs;i++) {
2351       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2352     }
2353   }
2354   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2355   /* free workspace */
2356   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2357   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2358   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2359   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2360   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2361   /* return pointer to global ordering of local dofs */
2362   *global_numbering_subset = temp_global_dofs;
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2368 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2369 {
2370   PetscInt       i,j;
2371   PetscScalar    *alphas;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   /* this implements stabilized Gram-Schmidt */
2376   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2377   for (i=0;i<n;i++) {
2378     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2379     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2380     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2381   }
2382   ierr = PetscFree(alphas);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatConvert_IS_AIJ"
2389 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2390 {
2391   Mat new_mat;
2392   Mat_IS *matis = (Mat_IS*)(mat->data);
2393   /* info on mat */
2394   PetscInt bs,rows,cols;
2395   PetscInt lrows,lcols;
2396   PetscInt local_rows,local_cols;
2397   PetscMPIInt nsubdomains;
2398   /* preallocation */
2399   Vec vec_dnz,vec_onz;
2400   PetscScalar *my_dnz,*my_onz,*array;
2401   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2402   PetscInt  index_row,index_col,owner;
2403   PetscInt  *local_indices,*global_indices;
2404   /* work */
2405   PetscInt i,j;
2406   PetscErrorCode ierr;
2407 
2408   PetscFunctionBegin;
2409   /* CHECKS */
2410   /* get info from mat */
2411   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2412   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2413   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2414   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2415 
2416   /* MAT_INITIAL_MATRIX */
2417   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2418   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2419   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2420   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2421   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2422   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2423 
2424   /*
2425     preallocation
2426   */
2427 
2428   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2429   /*
2430      Some vectors are needed to sum up properly on shared interface dofs.
2431      Note that preallocation is not exact, since it overestimates nonzeros
2432   */
2433 /*
2434   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2435   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2436   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2437   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2438 */
2439   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2440   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2441   /* All processes need to compute entire row ownership */
2442   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2443   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2444   for (i=0;i<nsubdomains;i++) {
2445     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2446       row_ownership[j]=i;
2447     }
2448   }
2449   /* map indices of local mat to global values */
2450   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2451   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2452   for (i=0;i<local_rows;i++) local_indices[i]=i;
2453   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2454   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2455 
2456   /*
2457      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2458      then, they will be summed up properly. This way, preallocation is always sufficient
2459   */
2460   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2461   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2462   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2463   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2464   for (i=0;i<local_rows;i++) {
2465     index_row = global_indices[i];
2466     for (j=i;j<local_rows;j++) {
2467       owner = row_ownership[index_row];
2468       index_col = global_indices[j];
2469       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2470         my_dnz[i] += 1.0;
2471       } else { /* offdiag block */
2472         my_onz[i] += 1.0;
2473       }
2474       /* same as before, interchanging rows and cols */
2475       if (i != j) {
2476         owner = row_ownership[index_col];
2477         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2478           my_dnz[j] += 1.0;
2479         } else {
2480           my_onz[j] += 1.0;
2481         }
2482       }
2483     }
2484   }
2485   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2486   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2487   if (local_rows) { /* multilevel guard */
2488     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2489     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2490   }
2491   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2492   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2493   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2494   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2495   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2496   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2497   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2498 
2499   /* set computed preallocation in dnz and onz */
2500   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2501   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2502   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2503   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2504   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2505   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2506   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2507   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2508 
2509   /* Resize preallocation if overestimated */
2510   for (i=0;i<lrows;i++) {
2511     dnz[i] = PetscMin(dnz[i],lcols);
2512     onz[i] = PetscMin(onz[i],cols-lcols);
2513   }
2514   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2515   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2516 
2517   /*
2518     Set values. Very Basic.
2519   */
2520   for (i=0;i<local_rows;i++) {
2521     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2522     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2523     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2524     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2525     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2526   }
2527   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2528   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2529   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2530 
2531   /* get back mat */
2532   *M = new_mat;
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatISSubassemble_Private"
2538 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2539 {
2540   Mat             subdomain_adj;
2541   IS              new_ranks,ranks_send_to;
2542   MatPartitioning partitioner;
2543   Mat_IS          *matis;
2544   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2545   PetscInt        prank;
2546   PetscMPIInt     size,rank,color;
2547   PetscInt        *xadj,*adjncy,*oldranks;
2548   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2549   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2550   PetscErrorCode  ierr;
2551   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2552   PetscSubcomm    subcomm;
2553 
2554   PetscFunctionBegin;
2555   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2556   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2557   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2558 
2559   /* Get info on mapping */
2560   matis = (Mat_IS*)(mat->data);
2561   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2562   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2563 
2564   /* build local CSR graph of subdomains' connectivity */
2565   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2566   xadj[0] = 0;
2567   xadj[1] = PetscMax(n_neighs-1,0);
2568   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2569   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2570 
2571   if (threshold) {
2572     PetscInt* count,min_threshold;
2573     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2574     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2575     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2576       for (j=0;j<n_shared[i];j++) {
2577         count[shared[i][j]] += 1;
2578       }
2579     }
2580     /* adapt threshold since we dont want to lose significant connections */
2581     min_threshold = n_neighs;
2582     for (i=1;i<n_neighs;i++) {
2583       for (j=0;j<n_shared[i];j++) {
2584         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2585       }
2586     }
2587     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2588     threshold = PetscMax(min_threshold+1,threshold);
2589     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2590     xadj[1] = 0;
2591     for (i=1;i<n_neighs;i++) {
2592       for (j=0;j<n_shared[i];j++) {
2593         if (count[shared[i][j]] < threshold) {
2594           adjncy[xadj[1]] = neighs[i];
2595           adjncy_wgt[xadj[1]] = n_shared[i];
2596           xadj[1]++;
2597           break;
2598         }
2599       }
2600     }
2601     ierr = PetscFree(count);CHKERRQ(ierr);
2602   } else {
2603     if (xadj[1]) {
2604       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2605       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2606     }
2607   }
2608   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2609   if (use_square) {
2610     for (i=0;i<xadj[1];i++) {
2611       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2612     }
2613   }
2614   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2615 
2616   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2617 
2618   /*
2619     Restrict work on active processes only.
2620   */
2621   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2622   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2623   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2624   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2625   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2626   if (color) {
2627     ierr = PetscFree(xadj);CHKERRQ(ierr);
2628     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2629     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2630   } else {
2631     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2632     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2633     prank = rank;
2634     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2635     for (i=0;i<size;i++) {
2636       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2637     }
2638     for (i=0;i<xadj[1];i++) {
2639       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2640     }
2641     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2642     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2643     n_subdomains = (PetscInt)size;
2644     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2645 
2646     /* Partition */
2647     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2648     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2649     if (use_vwgt) {
2650       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2651       v_wgt[0] = local_size;
2652       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2653     }
2654     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2655     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2656     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2657     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2658     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2659 
2660     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2661     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2662     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2663     /* clean up */
2664     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2665     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2666     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2667     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2668   }
2669   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2670 
2671   /* assemble parallel IS for sends */
2672   i = 1;
2673   if (color) i=0;
2674   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2675   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2676 
2677   /* get back IS */
2678   *is_sends = ranks_send_to;
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2683 
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatISSubassemble"
2686 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2687 {
2688   Mat                    local_mat,new_mat;
2689   Mat_IS                 *matis;
2690   IS                     is_sends_internal;
2691   PetscInt               rows,cols;
2692   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2693   PetscBool              ismatis,isdense;
2694   ISLocalToGlobalMapping l2gmap;
2695   PetscInt*              l2gmap_indices;
2696   const PetscInt*        is_indices;
2697   MatType                new_local_type;
2698   MatTypePrivate         new_local_type_private;
2699   /* buffers */
2700   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2701   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2702   /* MPI */
2703   MPI_Comm               comm;
2704   PetscMPIInt            n_sends,n_recvs,commsize;
2705   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2706   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2707   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2708   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2709   PetscErrorCode         ierr;
2710 
2711   PetscFunctionBegin;
2712   /* checks */
2713   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2714   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2715   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2716   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2717   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2718   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2719   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2720   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2721   PetscValidLogicalCollectiveInt(mat,bs,0);
2722   /* prepare IS for sending if not provided */
2723   if (!is_sends) {
2724     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2725     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2726   } else {
2727     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2728     is_sends_internal = is_sends;
2729   }
2730 
2731   /* get pointer of MATIS data */
2732   matis = (Mat_IS*)mat->data;
2733 
2734   /* get comm */
2735   comm = PetscObjectComm((PetscObject)mat);
2736 
2737   /* compute number of sends */
2738   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2739   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2740 
2741   /* compute number of receives */
2742   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2743   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2744   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2745   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2746   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2747   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2748   ierr = PetscFree(iflags);CHKERRQ(ierr);
2749 
2750   /* prepare send/receive buffers */
2751   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2752   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2753   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2754   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2755 
2756   /* Get data from local mat */
2757   if (!isdense) {
2758     /* TODO: See below some guidelines on how to prepare the local buffers */
2759     /*
2760        send_buffer_vals should contain the raw values of the local matrix
2761        send_buffer_idxs should contain:
2762        - MatType_PRIVATE type
2763        - PetscInt        size_of_l2gmap
2764        - PetscInt        global_row_indices[size_of_l2gmap]
2765        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2766     */
2767   } else {
2768     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2769     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2770     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2771     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2772     send_buffer_idxs[1] = i;
2773     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2774     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2775     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2776     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2777     for (i=0;i<n_sends;i++) {
2778       ilengths_vals[is_indices[i]] = len*len;
2779       ilengths_idxs[is_indices[i]] = len+2;
2780     }
2781   }
2782   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2783   buf_size_idxs = 0;
2784   buf_size_vals = 0;
2785   for (i=0;i<n_recvs;i++) {
2786     buf_size_idxs += (PetscInt)olengths_idxs[i];
2787     buf_size_vals += (PetscInt)olengths_vals[i];
2788   }
2789   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2790   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2791 
2792   /* get new tags for clean communications */
2793   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2794   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2795 
2796   /* allocate for requests */
2797   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2798   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2799   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2800   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2801 
2802   /* communications */
2803   ptr_idxs = recv_buffer_idxs;
2804   ptr_vals = recv_buffer_vals;
2805   for (i=0;i<n_recvs;i++) {
2806     source_dest = onodes[i];
2807     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2808     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2809     ptr_idxs += olengths_idxs[i];
2810     ptr_vals += olengths_vals[i];
2811   }
2812   for (i=0;i<n_sends;i++) {
2813     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2814     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2815     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2816   }
2817   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2818   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2819 
2820   /* assemble new l2g map */
2821   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2822   ptr_idxs = recv_buffer_idxs;
2823   buf_size_idxs = 0;
2824   for (i=0;i<n_recvs;i++) {
2825     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2826     ptr_idxs += olengths_idxs[i];
2827   }
2828   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2829   ptr_idxs = recv_buffer_idxs;
2830   buf_size_idxs = 0;
2831   for (i=0;i<n_recvs;i++) {
2832     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2833     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2834     ptr_idxs += olengths_idxs[i];
2835   }
2836   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2837   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2838   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2839 
2840   /* infer new local matrix type from received local matrices type */
2841   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2842   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2843   new_local_type_private = MATAIJ_PRIVATE;
2844   new_local_type = MATSEQAIJ;
2845   if (n_recvs) {
2846     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2847     ptr_idxs = recv_buffer_idxs;
2848     for (i=0;i<n_recvs;i++) {
2849       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2850         new_local_type_private = MATAIJ_PRIVATE;
2851         break;
2852       }
2853       ptr_idxs += olengths_idxs[i];
2854     }
2855     switch (new_local_type_private) {
2856       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2857         new_local_type = MATSEQAIJ;
2858         bs = 1;
2859         break;
2860       case MATAIJ_PRIVATE:
2861         new_local_type = MATSEQAIJ;
2862         bs = 1;
2863         break;
2864       case MATBAIJ_PRIVATE:
2865         new_local_type = MATSEQBAIJ;
2866         break;
2867       case MATSBAIJ_PRIVATE:
2868         new_local_type = MATSEQSBAIJ;
2869         break;
2870       default:
2871         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2872         break;
2873     }
2874   }
2875 
2876   /* create MATIS object */
2877   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2878   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2879   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2880   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2881   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2882   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2883 
2884   /* set values */
2885   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2886   ptr_vals = recv_buffer_vals;
2887   ptr_idxs = recv_buffer_idxs;
2888   for (i=0;i<n_recvs;i++) {
2889     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2890       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2891       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2892       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2893       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2894       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2895     }
2896     ptr_idxs += olengths_idxs[i];
2897     ptr_vals += olengths_vals[i];
2898   }
2899   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2900   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2901   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903 
2904   { /* check */
2905     Vec       lvec,rvec;
2906     PetscReal infty_error;
2907 
2908     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2909     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2910     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2911     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2912     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2913     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2914     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2915     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2916     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2917   }
2918 
2919   /* free workspace */
2920   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2921   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2922   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2923   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2924   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2925   if (isdense) {
2926     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2927     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2928   } else {
2929     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2930   }
2931   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2932   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2933   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2934   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2935   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2936   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2937   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2938   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2939   ierr = PetscFree(onodes);CHKERRQ(ierr);
2940   /* get back new mat */
2941   *mat_n = new_mat;
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #undef __FUNCT__
2946 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2947 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2948 {
2949   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2950   PC_IS                  *pcis = (PC_IS*)pc->data;
2951   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2952   MatNullSpace           CoarseNullSpace=NULL;
2953   ISLocalToGlobalMapping coarse_islg;
2954   IS                     coarse_is;
2955   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2956   PetscInt               im_active=-1,active_procs=-1;
2957   PC                     pc_temp;
2958   PCType                 coarse_pc_type;
2959   KSPType                coarse_ksp_type;
2960   PetscBool              multilevel_requested,multilevel_allowed;
2961   PetscBool              setsym,issym,isbddc,isnn;
2962   MatStructure           matstruct;
2963   PetscErrorCode         ierr;
2964 
2965   PetscFunctionBegin;
2966   /* Assign global numbering to coarse dofs */
2967   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2968 
2969   /* infer some info from user */
2970   issym = PETSC_FALSE;
2971   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2972   multilevel_allowed = PETSC_FALSE;
2973   multilevel_requested = PETSC_FALSE;
2974   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2975   if (multilevel_requested) {
2976     /* count "active processes" */
2977     im_active = !!(pcis->n);
2978     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2979     if (active_procs/pcbddc->coarsening_ratio < 2) {
2980       multilevel_allowed = PETSC_FALSE;
2981     } else {
2982       multilevel_allowed = PETSC_TRUE;
2983     }
2984   }
2985 
2986   /* set defaults for coarse KSP and PC */
2987   if (multilevel_allowed) {
2988     if (issym) {
2989       coarse_ksp_type = KSPRICHARDSON;
2990     } else {
2991       coarse_ksp_type = KSPCHEBYSHEV;
2992     }
2993     coarse_pc_type = PCBDDC;
2994   } else {
2995     coarse_ksp_type = KSPPREONLY;
2996     coarse_pc_type = PCREDUNDANT;
2997   }
2998 
2999   /* create the coarse KSP object only once with defaults */
3000   if (!pcbddc->coarse_ksp) {
3001     char prefix[256],str_level[3];
3002     size_t len;
3003     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3004     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3005     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3006     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3007     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3008     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3009     /* prefix */
3010     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3011     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3012     if (!pcbddc->current_level) {
3013       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3014       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3015     } else {
3016       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3017       if (pcbddc->current_level>1) len -= 2;
3018       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3019       *(prefix+len)='\0';
3020       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3021       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3022     }
3023     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3024   }
3025   /* allow user customization */
3026   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3027   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3028   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3029 
3030   /* get some info after set from options */
3031   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3032   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3033   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3034   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3035     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3036     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3037     isbddc = PETSC_FALSE;
3038   }
3039 
3040   /* propagate BDDC info to the next level */
3041   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3042   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3043   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3044 
3045   /* creates temporary MATIS object for coarse matrix */
3046   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3047   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3048   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3049   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3050   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3051   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3054   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3055   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3056 
3057   /* assemble coarse matrix */
3058   if (isbddc || isnn) {
3059     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3060   } else {
3061     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3062   }
3063   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3064 
3065   /* create local to global scatters for coarse problem */
3066   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3067   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3068   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3069 
3070   /* propagate symmetry info to coarse matrix */
3071   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3072 
3073   /* Compute coarse null space (special handling by BDDC only) */
3074   if (pcbddc->NullSpace) {
3075     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3076     if (isbddc) {
3077       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3078     } else {
3079       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3080     }
3081   }
3082 
3083   /* set operators */
3084   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3085   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3086 
3087   /* additional KSP customization */
3088   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3089   if (max_it < 5) {
3090     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3091   }
3092   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3093 
3094 
3095   /* print some info if requested */
3096   if (pcbddc->dbg_flag) {
3097     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3098     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3099     if (!multilevel_allowed) {
3100       if (multilevel_requested) {
3101         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3102       } else if (pcbddc->max_levels) {
3103         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3104       }
3105     }
3106     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3107     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3108   }
3109 
3110   /* setup coarse ksp */
3111   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3112   if (pcbddc->dbg_flag) {
3113     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3114     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3115     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3116     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3117   }
3118 
3119   /* Check coarse problem if requested */
3120   if (pcbddc->dbg_flag) {
3121     KSP       check_ksp;
3122     KSPType   check_ksp_type;
3123     PC        check_pc;
3124     Vec       check_vec;
3125     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3126     PetscInt  its;
3127     PetscBool ispreonly,compute;
3128 
3129     /* Create ksp object suitable for estimation of extreme eigenvalues */
3130     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3131     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3132     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3133     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3134     if (ispreonly) {
3135       check_ksp_type = KSPPREONLY;
3136       compute = PETSC_FALSE;
3137     } else {
3138       if (issym) check_ksp_type = KSPCG;
3139       else check_ksp_type = KSPGMRES;
3140       compute = PETSC_TRUE;
3141     }
3142     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3143     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3144     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3145     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3146     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3147     /* create random vec */
3148     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3149     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3150     if (CoarseNullSpace) {
3151       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3152     }
3153     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3154     /* solve coarse problem */
3155     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3156     if (CoarseNullSpace) {
3157       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3158     }
3159     /* check coarse problem residual error */
3160     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3161     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3162     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3163     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3164     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3165     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3166     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3167     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3168     /* get eigenvalue estimation if preonly has not been requested */
3169     if (!ispreonly) {
3170       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3171       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3172       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3173     }
3174     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3175     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3176   }
3177   /* free memory */
3178   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3179   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 #undef __FUNCT__
3184 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3185 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3186 {
3187   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3188   PC_IS*         pcis = (PC_IS*)pc->data;
3189   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3190   PetscInt       i,j,coarse_size;
3191   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3192   PetscErrorCode ierr;
3193 
3194   PetscFunctionBegin;
3195   /* get indices in local ordering for vertices and constraints */
3196   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3197   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3198   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3199   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3200   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3201   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3202   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3203 
3204   /* Compute global number of coarse dofs */
3205   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3206 
3207   /* check numbering */
3208   if (pcbddc->dbg_flag) {
3209     PetscScalar coarsesum,*array;
3210 
3211     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3212     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3213     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3214     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3215     for (i=0;i<pcbddc->local_primal_size;i++) {
3216       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3217     }
3218     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3219     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3220     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3221     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3222     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3223     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3224     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3225     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3226     for (i=0;i<pcis->n;i++) {
3227       if (array[i] == 1.0) {
3228         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3229       }
3230     }
3231     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3232     for (i=0;i<pcis->n;i++) {
3233       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3234     }
3235     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3236     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3237     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3238     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3239     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3240     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3241     if (pcbddc->dbg_flag > 1) {
3242       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3243       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3244       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3245       for (i=0;i<pcbddc->local_primal_size;i++) {
3246         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3247       }
3248       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3249     }
3250     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3251   }
3252   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3253   /* get back data */
3254   *coarse_size_n = coarse_size;
3255   *local_primal_indices_n = local_primal_indices;
3256   PetscFunctionReturn(0);
3257 }
3258 
3259