xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision dc359a40b44efbd9b64b0b4f1b55ec9ba9d227e8)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559     unsymmetric_check = PETSC_TRUE;
560   } else { /* take references to already computed coarse basis */
561     unsymmetric_check = PETSC_FALSE;
562     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
563     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
564     if (pcbddc->coarse_phi_D) {
565       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
566       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
567     }
568   }
569   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
570   /* Checking coarse_sub_mat and coarse basis functios */
571   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
572   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
573   if (pcbddc->dbg_flag) {
574     Mat         coarse_sub_mat;
575     Mat         AUXMAT,TM1,TM2,TM3,TM4;
576     Mat         coarse_phi_D,coarse_phi_B;
577     Mat         coarse_psi_D,coarse_psi_B;
578     Mat         A_II,A_BB,A_IB,A_BI;
579     MatType     checkmattype=MATSEQAIJ;
580     PetscReal   real_value;
581 
582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
588     if (unsymmetric_check) {
589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
591     }
592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
593 
594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
597     if (unsymmetric_check) {
598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     } else {
611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
619     }
620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
624     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
625     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
626     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
627     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
628     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
629     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
630     for (i=0;i<pcbddc->local_primal_size;i++) {
631       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
632     }
633     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
634     for (i=0;i<pcbddc->local_primal_size;i++) {
635       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
636     }
637     if (unsymmetric_check) {
638       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
639       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
641       }
642       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
643       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
645       }
646     }
647     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
649     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
652     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
653     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
654     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
655     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
656     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
657     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
658     if (unsymmetric_check) {
659       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
660       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
661     }
662     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
663     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
664     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
665     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
666   }
667   /* free memory */
668   if (n_vertices) {
669     ierr = PetscFree(vertices);CHKERRQ(ierr);
670     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
671     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
672     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
674     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
675   }
676   if (n_constraints) {
677     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
678     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
679     ierr = MatDestroy(&M1);CHKERRQ(ierr);
680     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
681   }
682   ierr = PetscFree(auxindices);CHKERRQ(ierr);
683   /* get back data */
684   *coarse_submat_vals_n = coarse_submat_vals;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
691 {
692   PC_IS*            pcis = (PC_IS*)(pc->data);
693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
695   PetscBool         issbaij,isbaij;
696   /* manage repeated solves */
697   MatReuse          reuse;
698   MatStructure      matstruct;
699   PetscErrorCode    ierr;
700 
701   PetscFunctionBegin;
702   /* get mat flags */
703   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
704   reuse = MAT_INITIAL_MATRIX;
705   if (pc->setupcalled) {
706     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
707     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
708     if (matstruct == SAME_NONZERO_PATTERN) {
709       reuse = MAT_REUSE_MATRIX;
710     } else {
711       reuse = MAT_INITIAL_MATRIX;
712     }
713   }
714   if (reuse == MAT_INITIAL_MATRIX) {
715     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
716     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
717     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
718     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
719     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
720   }
721 
722   /* transform local matrices if needed */
723   if (pcbddc->use_change_of_basis) {
724     Mat         change_mat_all;
725     PetscScalar *row_cmat_values;
726     PetscInt    *row_cmat_indices;
727     PetscInt    *nnz,*is_indices,*temp_indices;
728     PetscInt    i,j,k,n_D,n_B;
729 
730     /* Get Non-overlapping dimensions */
731     n_B = pcis->n_B;
732     n_D = pcis->n-n_B;
733 
734     /* compute nonzero structure of change of basis on all local nodes */
735     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     k=1;
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
743       nnz[is_indices[i]]=j;
744       if (k < j) k = j;
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
746     }
747     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
748     /* assemble change of basis matrix on the whole set of local dofs */
749     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
750     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
751     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
752     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
753     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
754     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
755     for (i=0;i<n_D;i++) {
756       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
757     }
758     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
759     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
760     for (i=0;i<n_B;i++) {
761       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
762       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
763       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
764       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
765     }
766     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
769     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
770     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
771     if (!issbaij && !isbaij) {
772       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
773     } else {
774       Mat work_mat;
775       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
776       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
777       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
778     }
779     /*
780     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
781     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
782     */
783     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
784     ierr = PetscFree(nnz);CHKERRQ(ierr);
785     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
786   } else {
787     /* without change of basis, the local matrix is unchanged */
788     if (!pcbddc->local_mat) {
789       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
790       pcbddc->local_mat = matis->A;
791     }
792   }
793 
794   /* get submatrices */
795   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
796   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
797   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
798   if (!issbaij) {
799     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
800     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
801   } else {
802     Mat newmat;
803     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
804     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
805     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
806     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
814 {
815   PC_IS*         pcis = (PC_IS*)(pc->data);
816   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
817   IS             is_aux1,is_aux2;
818   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
819   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
820   PetscInt       vbs,bs;
821   PetscBT        bitmask;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   /* Set Non-overlapping dimensions */
826   n_B = pcis->n_B; n_D = pcis->n - n_B;
827   /* get vertex indices from constraint matrix */
828   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
829 
830   /* create auxiliary bitmask */
831   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
832   for (i=0;i<n_vertices;i++) {
833     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
834   }
835 
836   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
837   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
838   for (i=0, n_R=0; i<pcis->n; i++) {
839     if (!PetscBTLookup(bitmask,i)) {
840       idx_R_local[n_R] = i;
841       n_R++;
842     }
843   }
844 
845   /* Block code */
846   vbs = 1;
847   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
848   if (bs>1 && !(n_vertices%bs)) {
849     PetscBool is_blocked = PETSC_TRUE;
850     PetscInt  *vary;
851     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
852     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
853     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
854     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
855     for (i=0; i<n_vertices; i++) {
856       if (vary[i]!=0 && vary[i]!=bs) {
857         is_blocked = PETSC_FALSE;
858         break;
859       }
860     }
861     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
862       vbs = bs;
863       for (i=0;i<n_R/vbs;i++) {
864         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
865       }
866     }
867     ierr = PetscFree(vary);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(vertices);CHKERRQ(ierr);
870   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
871   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
872 
873   /* print some info if requested */
874   if (pcbddc->dbg_flag) {
875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
877     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
878     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
879     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
880     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
885   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
886   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
887   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
888   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
889   for (i=0; i<n_D; i++) {
890     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
891   }
892   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
893   for (i=0, j=0; i<n_R; i++) {
894     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
895       aux_array1[j++] = i;
896     }
897   }
898   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
899   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
900   for (i=0, j=0; i<n_B; i++) {
901     if (!PetscBTLookup(bitmask,is_indices[i])) {
902       aux_array2[j++] = i;
903     }
904   }
905   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
906   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
907   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
908   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
909   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
910 
911   if (pcbddc->switch_static || pcbddc->dbg_flag) {
912     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
913     for (i=0, j=0; i<n_R; i++) {
914       if (PetscBTLookup(bitmask,idx_R_local[i])) {
915         aux_array1[j++] = i;
916       }
917     }
918     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
919     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
920     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
921   }
922   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
931 {
932   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
933   PC_IS          *pcis = (PC_IS*)pc->data;
934   PC             pc_temp;
935   Mat            A_RR;
936   Vec            vec1,vec2,vec3;
937   MatStructure   matstruct;
938   PetscScalar    m_one = -1.0;
939   PetscReal      value;
940   PetscInt       n_D,n_R,ibs,mbs;
941   PetscBool      use_exact,use_exact_reduced,issbaij;
942   PetscErrorCode ierr;
943   /* prefixes stuff */
944   char           dir_prefix[256],neu_prefix[256],str_level[3];
945   size_t         len;
946 
947   PetscFunctionBegin;
948   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
949 
950   /* compute prefixes */
951   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
952   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
953   if (!pcbddc->current_level) {
954     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
955     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
956     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
957     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
958   } else {
959     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
960     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
961     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
962     len -= 15; /* remove "pc_bddc_coarse_" */
963     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
964     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
965     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
966     *(dir_prefix+len)='\0';
967     *(neu_prefix+len)='\0';
968     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
969     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
970     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
971     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
972   }
973 
974   /* DIRICHLET PROBLEM */
975   /* Matrix for Dirichlet problem is pcis->A_II */
976   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
977   if (!pcbddc->ksp_D) { /* create object if not yet build */
978     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
979     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
980     /* default */
981     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
982     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
983     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
984     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
985     if (issbaij) {
986       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
987     } else {
988       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
989     }
990     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
991   }
992   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
993   /* Allow user's customization */
994   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
995   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
996   if (!n_D) {
997     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
998     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
999   }
1000   /* Set Up KSP for Dirichlet problem of BDDC */
1001   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1002   /* set ksp_D into pcis data */
1003   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1004   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1005   pcis->ksp_D = pcbddc->ksp_D;
1006 
1007   /* NEUMANN PROBLEM */
1008   /* Matrix for Neumann problem is A_RR -> we need to create it */
1009   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1010   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1011   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1012   if (ibs != mbs) {
1013     Mat newmat;
1014     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1015     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1016     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1017   } else {
1018     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1019   }
1020   if (!pcbddc->ksp_R) { /* create object if not yet build */
1021     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1022     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1023     /* default */
1024     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1025     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1026     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1027     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1028     if (issbaij) {
1029       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1030     } else {
1031       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1032     }
1033     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1034   }
1035   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1036   /* Allow user's customization */
1037   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1038   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1039   if (!n_R) {
1040     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1041     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1042   }
1043   /* Set Up KSP for Neumann problem of BDDC */
1044   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1045 
1046   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1047 
1048   /* Dirichlet */
1049   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1050   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1051   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1052   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1053   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1054   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1055   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1056   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1057   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1058   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1059   /* need to be adapted? */
1060   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1061   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1062   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1063   /* print info */
1064   if (pcbddc->dbg_flag) {
1065     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1068     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1070   }
1071   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1072     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1073   }
1074 
1075   /* Neumann */
1076   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1077   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1078   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1079   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1080   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1081   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1082   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1083   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1084   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1085   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1086   /* need to be adapted? */
1087   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1088   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089   /* print info */
1090   if (pcbddc->dbg_flag) {
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1095     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1096   }
1097   /* free Neumann problem's matrix */
1098   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1104 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc)
1105 {
1106   PetscErrorCode ierr;
1107   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1108 
1109   PetscFunctionBegin;
1110   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1111   if (pcbddc->local_auxmat1) {
1112     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1113     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1119 #undef __FUNCT__
1120 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1121 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1122 {
1123   PetscErrorCode ierr;
1124   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1125   PC_IS*            pcis = (PC_IS*)  (pc->data);
1126   const PetscScalar zero = 0.0;
1127 
1128   PetscFunctionBegin;
1129   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1130   if (applytranspose) {
1131     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1132     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1133   } else {
1134     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1135     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1136   }
1137   /* Scatter data of coarse_rhs */
1138   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1139   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140 
1141   /* Local solution on R nodes */
1142   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1143   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   if (pcbddc->switch_static) {
1146     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   }
1149   ierr = PCBDDCSolveSubstructureCorrection(pc);CHKERRQ(ierr);
1150   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1151   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1153   if (pcbddc->switch_static) {
1154     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   }
1157 
1158   /* Coarse solution */
1159   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1161     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1162   }
1163   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165 
1166   /* Sum contributions from two levels */
1167   if (applytranspose) {
1168     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1169     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1170   } else {
1171     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1172     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1179 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1180 {
1181   PetscErrorCode ierr;
1182   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1183 
1184   PetscFunctionBegin;
1185   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1191 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1192 {
1193   PetscErrorCode ierr;
1194   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1195 
1196   PetscFunctionBegin;
1197   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /* uncomment for testing purposes */
1202 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1203 #undef __FUNCT__
1204 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1205 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1206 {
1207   PetscErrorCode    ierr;
1208   PC_IS*            pcis = (PC_IS*)(pc->data);
1209   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1210   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1211   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1212   MatType           impMatType=MATSEQAIJ;
1213   /* one and zero */
1214   PetscScalar       one=1.0,zero=0.0;
1215   /* space to store constraints and their local indices */
1216   PetscScalar       *temp_quadrature_constraint;
1217   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1218   /* iterators */
1219   PetscInt          i,j,k,total_counts,temp_start_ptr;
1220   /* stuff to store connected components stored in pcbddc->mat_graph */
1221   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1222   PetscInt          n_ISForFaces,n_ISForEdges;
1223   /* near null space stuff */
1224   MatNullSpace      nearnullsp;
1225   const Vec         *nearnullvecs;
1226   Vec               *localnearnullsp;
1227   PetscBool         nnsp_has_cnst;
1228   PetscInt          nnsp_size;
1229   PetscScalar       *array;
1230   /* BLAS integers */
1231   PetscBLASInt      lwork,lierr;
1232   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1233   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1234   /* LAPACK working arrays for SVD or POD */
1235   PetscBool         skip_lapack;
1236   PetscScalar       *work;
1237   PetscReal         *singular_vals;
1238 #if defined(PETSC_USE_COMPLEX)
1239   PetscReal         *rwork;
1240 #endif
1241 #if defined(PETSC_MISSING_LAPACK_GESVD)
1242   PetscBLASInt      Blas_one_2=1;
1243   PetscScalar       *temp_basis,*correlation_mat;
1244 #else
1245   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1246   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1247 #endif
1248   /* change of basis */
1249   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1250   PetscBool         boolforchange,qr_needed;
1251   PetscBT           touched,change_basis,qr_needed_idx;
1252   /* auxiliary stuff */
1253   PetscInt          *nnz,*is_indices,*local_to_B;
1254   /* some quantities */
1255   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1256   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1257 
1258 
1259   PetscFunctionBegin;
1260   /* Get index sets for faces, edges and vertices from graph */
1261   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1262     pcbddc->use_vertices = PETSC_TRUE;
1263   }
1264   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1265   /* HACK: provide functions to set change of basis */
1266   if (!ISForVertices && pcbddc->NullSpace) {
1267     pcbddc->use_change_of_basis = PETSC_TRUE;
1268     pcbddc->use_change_on_faces = PETSC_FALSE;
1269   }
1270   /* print some info */
1271   if (pcbddc->dbg_flag) {
1272     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1273     i = 0;
1274     if (ISForVertices) {
1275       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1278     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1279     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1280     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1281   }
1282   /* check if near null space is attached to global mat */
1283   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1284   if (nearnullsp) {
1285     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1286   } else { /* if near null space is not provided BDDC uses constants by default */
1287     nnsp_size = 0;
1288     nnsp_has_cnst = PETSC_TRUE;
1289   }
1290   /* get max number of constraints on a single cc */
1291   max_constraints = nnsp_size;
1292   if (nnsp_has_cnst) max_constraints++;
1293 
1294   /*
1295        Evaluate maximum storage size needed by the procedure
1296        - temp_indices will contain start index of each constraint stored as follows
1297        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1298        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1299        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1300                                                                                                                                                          */
1301   total_counts = n_ISForFaces+n_ISForEdges;
1302   total_counts *= max_constraints;
1303   n_vertices = 0;
1304   if (ISForVertices) {
1305     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1306   }
1307   total_counts += n_vertices;
1308   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1309   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1310   total_counts = 0;
1311   max_size_of_constraint = 0;
1312   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1313     if (i<n_ISForEdges) {
1314       used_IS = &ISForEdges[i];
1315     } else {
1316       used_IS = &ISForFaces[i-n_ISForEdges];
1317     }
1318     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1319     total_counts += j;
1320     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1321   }
1322   total_counts *= max_constraints;
1323   total_counts += n_vertices;
1324   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1325   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1326   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1327   /* local to boundary numbering */
1328   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1329   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1330   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1331   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1332   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1333   /* get local part of global near null space vectors */
1334   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1335   for (k=0;k<nnsp_size;k++) {
1336     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1337     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   }
1340 
1341   /* whether or not to skip lapack calls */
1342   skip_lapack = PETSC_TRUE;
1343   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1344 
1345   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1346   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1347     PetscScalar temp_work;
1348 #if defined(PETSC_MISSING_LAPACK_GESVD)
1349     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1350     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1351     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1352     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1353 #if defined(PETSC_USE_COMPLEX)
1354     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1355 #endif
1356     /* now we evaluate the optimal workspace using query with lwork=-1 */
1357     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1358     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1359     lwork = -1;
1360     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1361 #if !defined(PETSC_USE_COMPLEX)
1362     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1363 #else
1364     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1365 #endif
1366     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1367     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1368 #else /* on missing GESVD */
1369     /* SVD */
1370     PetscInt max_n,min_n;
1371     max_n = max_size_of_constraint;
1372     min_n = max_constraints;
1373     if (max_size_of_constraint < max_constraints) {
1374       min_n = max_size_of_constraint;
1375       max_n = max_constraints;
1376     }
1377     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1378 #if defined(PETSC_USE_COMPLEX)
1379     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1380 #endif
1381     /* now we evaluate the optimal workspace using query with lwork=-1 */
1382     lwork = -1;
1383     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1384     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1385     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1386     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1387 #if !defined(PETSC_USE_COMPLEX)
1388     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1389 #else
1390     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1391 #endif
1392     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1393     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1394 #endif /* on missing GESVD */
1395     /* Allocate optimal workspace */
1396     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1397     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1398   }
1399   /* Now we can loop on constraining sets */
1400   total_counts = 0;
1401   temp_indices[0] = 0;
1402   /* vertices */
1403   if (ISForVertices) {
1404     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1405     if (nnsp_has_cnst) { /* consider all vertices */
1406       for (i=0;i<n_vertices;i++) {
1407         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1408         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1409         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1410         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1411         total_counts++;
1412       }
1413     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1414       PetscBool used_vertex;
1415       for (i=0;i<n_vertices;i++) {
1416         used_vertex = PETSC_FALSE;
1417         k = 0;
1418         while (!used_vertex && k<nnsp_size) {
1419           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1420           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1421             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1422             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1423             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1424             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1425             total_counts++;
1426             used_vertex = PETSC_TRUE;
1427           }
1428           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1429           k++;
1430         }
1431       }
1432     }
1433     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1434     n_vertices = total_counts;
1435   }
1436 
1437   /* edges and faces */
1438   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1439     if (i<n_ISForEdges) {
1440       used_IS = &ISForEdges[i];
1441       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1442     } else {
1443       used_IS = &ISForFaces[i-n_ISForEdges];
1444       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1445     }
1446     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1447     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1448     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1449     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1450     /* change of basis should not be performed on local periodic nodes */
1451     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1452     if (nnsp_has_cnst) {
1453       PetscScalar quad_value;
1454       temp_constraints++;
1455       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1456       for (j=0;j<size_of_constraint;j++) {
1457         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1458         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1459         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1460       }
1461       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1462       total_counts++;
1463     }
1464     for (k=0;k<nnsp_size;k++) {
1465       PetscReal real_value;
1466       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1467       for (j=0;j<size_of_constraint;j++) {
1468         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1469         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1470         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1471       }
1472       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1473       /* check if array is null on the connected component */
1474       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1475       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1476       if (real_value > 0.0) { /* keep indices and values */
1477         temp_constraints++;
1478         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1479         total_counts++;
1480       }
1481     }
1482     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1483     valid_constraints = temp_constraints;
1484     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1485     if (!pcbddc->use_nnsp_true && temp_constraints) {
1486       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1487 
1488 #if defined(PETSC_MISSING_LAPACK_GESVD)
1489       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1490          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1491          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1492             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1493             from that computed using LAPACKgesvd
1494          -> This is due to a different computation of eigenvectors in LAPACKheev
1495          -> The quality of the POD-computed basis will be the same */
1496       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1497       /* Store upper triangular part of correlation matrix */
1498       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1499       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1500       for (j=0;j<temp_constraints;j++) {
1501         for (k=0;k<j+1;k++) {
1502           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1503         }
1504       }
1505       /* compute eigenvalues and eigenvectors of correlation matrix */
1506       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1507       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1508 #if !defined(PETSC_USE_COMPLEX)
1509       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1510 #else
1511       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1512 #endif
1513       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1514       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1515       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1516       j=0;
1517       while (j < temp_constraints && singular_vals[j] < tol) j++;
1518       total_counts=total_counts-j;
1519       valid_constraints = temp_constraints-j;
1520       /* scale and copy POD basis into used quadrature memory */
1521       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1522       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1523       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1524       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1525       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1526       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1527       if (j<temp_constraints) {
1528         PetscInt ii;
1529         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1530         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1531         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1532         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1533         for (k=0;k<temp_constraints-j;k++) {
1534           for (ii=0;ii<size_of_constraint;ii++) {
1535             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1536           }
1537         }
1538       }
1539 #else  /* on missing GESVD */
1540       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1541       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1542       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1543       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1544 #if !defined(PETSC_USE_COMPLEX)
1545       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1546 #else
1547       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1548 #endif
1549       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1550       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1551       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1552       k = temp_constraints;
1553       if (k > size_of_constraint) k = size_of_constraint;
1554       j = 0;
1555       while (j < k && singular_vals[k-j-1] < tol) j++;
1556       total_counts = total_counts-temp_constraints+k-j;
1557       valid_constraints = k-j;
1558 #endif /* on missing GESVD */
1559     }
1560     /* setting change_of_basis flag is safe now */
1561     if (boolforchange) {
1562       for (j=0;j<valid_constraints;j++) {
1563         PetscBTSet(change_basis,total_counts-j-1);
1564       }
1565     }
1566   }
1567   /* free index sets of faces, edges and vertices */
1568   for (i=0;i<n_ISForFaces;i++) {
1569     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1570   }
1571   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1572   for (i=0;i<n_ISForEdges;i++) {
1573     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1574   }
1575   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1576   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1577 
1578   /* free workspace */
1579   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1580     ierr = PetscFree(work);CHKERRQ(ierr);
1581 #if defined(PETSC_USE_COMPLEX)
1582     ierr = PetscFree(rwork);CHKERRQ(ierr);
1583 #endif
1584     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1585 #if defined(PETSC_MISSING_LAPACK_GESVD)
1586     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1587     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1588 #endif
1589   }
1590   for (k=0;k<nnsp_size;k++) {
1591     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1592   }
1593   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1594 
1595   /* set quantities in pcbddc data structure */
1596   /* n_vertices defines the number of subdomain corners in the primal space */
1597   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1598   pcbddc->local_primal_size = total_counts;
1599   pcbddc->n_vertices = n_vertices;
1600   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1601 
1602   /* Create constraint matrix */
1603   /* The constraint matrix is used to compute the l2g map of primal dofs */
1604   /* so we need to set it up properly either with or without change of basis */
1605   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1606   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1607   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1608   /* array to compute a local numbering of constraints : vertices first then constraints */
1609   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1610   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1611   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1612   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1613   /* auxiliary stuff for basis change */
1614   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1615   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1616 
1617   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1618   total_primal_vertices=0;
1619   for (i=0;i<pcbddc->local_primal_size;i++) {
1620     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1621     if (size_of_constraint == 1) {
1622       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1623       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1624       aux_primal_minloc[total_primal_vertices]=0;
1625       total_primal_vertices++;
1626     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1627       PetscInt min_loc,min_index;
1628       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1629       /* find first untouched local node */
1630       k = 0;
1631       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1632       min_index = global_indices[k];
1633       min_loc = k;
1634       /* search the minimum among global nodes already untouched on the cc */
1635       for (k=1;k<size_of_constraint;k++) {
1636         /* there can be more than one constraint on a single connected component */
1637         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1638           min_index = global_indices[k];
1639           min_loc = k;
1640         }
1641       }
1642       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1643       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1644       aux_primal_minloc[total_primal_vertices]=min_loc;
1645       total_primal_vertices++;
1646     }
1647   }
1648   /* determine if a QR strategy is needed for change of basis */
1649   qr_needed = PETSC_FALSE;
1650   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1651   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1652     if (PetscBTLookup(change_basis,i)) {
1653       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1654       j = 0;
1655       for (k=0;k<size_of_constraint;k++) {
1656         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1657           j++;
1658         }
1659       }
1660       /* found more than one primal dof on the cc */
1661       if (j > 1) {
1662         PetscBTSet(qr_needed_idx,i);
1663         qr_needed = PETSC_TRUE;
1664       }
1665     }
1666   }
1667   /* free workspace */
1668   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1669 
1670   /* permute indices in order to have a sorted set of vertices */
1671   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1672 
1673   /* nonzero structure of constraint matrix */
1674   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1675   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1676   j=total_primal_vertices;
1677   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1678     if (!PetscBTLookup(change_basis,i)) {
1679       nnz[j]=temp_indices[i+1]-temp_indices[i];
1680       j++;
1681     }
1682   }
1683   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1684   ierr = PetscFree(nnz);CHKERRQ(ierr);
1685   /* set values in constraint matrix */
1686   for (i=0;i<total_primal_vertices;i++) {
1687     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1688   }
1689   total_counts = total_primal_vertices;
1690   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1691     if (!PetscBTLookup(change_basis,i)) {
1692       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1693       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1694       total_counts++;
1695     }
1696   }
1697   /* assembling */
1698   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   /*
1701   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1702   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1703   */
1704   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1705   if (pcbddc->use_change_of_basis) {
1706     /* dual and primal dofs on a single cc */
1707     PetscInt     dual_dofs,primal_dofs;
1708     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1709     PetscInt     primal_counter;
1710     /* working stuff for GEQRF */
1711     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1712     PetscBLASInt lqr_work;
1713     /* working stuff for UNGQR */
1714     PetscScalar  *gqr_work,lgqr_work_t;
1715     PetscBLASInt lgqr_work;
1716     /* working stuff for TRTRS */
1717     PetscScalar  *trs_rhs;
1718     PetscBLASInt Blas_NRHS;
1719     /* pointers for values insertion into change of basis matrix */
1720     PetscInt     *start_rows,*start_cols;
1721     PetscScalar  *start_vals;
1722     /* working stuff for values insertion */
1723     PetscBT      is_primal;
1724 
1725     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1726     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1727     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1728     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1729     /* work arrays */
1730     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1731     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1732     /* nonzeros per row */
1733     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1734       if (PetscBTLookup(change_basis,i)) {
1735         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1736         if (PetscBTLookup(qr_needed_idx,i)) {
1737           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1738         } else {
1739           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1740           /* get local primal index on the cc */
1741           j = 0;
1742           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1743           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1744         }
1745       }
1746     }
1747     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1748     ierr = PetscFree(nnz);CHKERRQ(ierr);
1749     /* Set initial identity in the matrix */
1750     for (i=0;i<pcis->n_B;i++) {
1751       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1752     }
1753 
1754     if (pcbddc->dbg_flag) {
1755       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1756       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1757     }
1758 
1759 
1760     /* Now we loop on the constraints which need a change of basis */
1761     /*
1762        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1763        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1764 
1765        Basic blocks of change of basis matrix T computed
1766 
1767           - Using the following block transformation if there is only a primal dof on the cc
1768             (in the example, primal dof is the last one of the edge in LOCAL ordering
1769              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1770             | 1        0   ...        0     1 |
1771             | 0        1   ...        0     1 |
1772             |              ...                |
1773             | 0        ...            1     1 |
1774             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1775 
1776           - via QR decomposition of constraints otherwise
1777     */
1778     if (qr_needed) {
1779       /* space to store Q */
1780       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1781       /* first we issue queries for optimal work */
1782       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1783       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1784       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1785       lqr_work = -1;
1786       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1787       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1788       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1789       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1790       lgqr_work = -1;
1791       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1792       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1793       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1794       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1795       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1796       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1797       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1798       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1799       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1800       /* array to store scaling factors for reflectors */
1801       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1802       /* array to store rhs and solution of triangular solver */
1803       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1804       /* allocating workspace for check */
1805       if (pcbddc->dbg_flag) {
1806         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1807       }
1808     }
1809     /* array to store whether a node is primal or not */
1810     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1811     for (i=0;i<total_primal_vertices;i++) {
1812       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1813     }
1814 
1815     /* loop on constraints and see whether or not they need a change of basis and compute it */
1816     /* -> using implicit ordering contained in temp_indices data */
1817     total_counts = pcbddc->n_vertices;
1818     primal_counter = total_counts;
1819     while (total_counts<pcbddc->local_primal_size) {
1820       primal_dofs = 1;
1821       if (PetscBTLookup(change_basis,total_counts)) {
1822         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1823         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1824           primal_dofs++;
1825         }
1826         /* get constraint info */
1827         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1828         dual_dofs = size_of_constraint-primal_dofs;
1829 
1830         if (pcbddc->dbg_flag) {
1831           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1832         }
1833 
1834         if (primal_dofs > 1) { /* QR */
1835 
1836           /* copy quadrature constraints for change of basis check */
1837           if (pcbddc->dbg_flag) {
1838             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1839           }
1840           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1841           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1842 
1843           /* compute QR decomposition of constraints */
1844           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1845           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1846           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1847           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1848           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1849           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1850           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1851 
1852           /* explictly compute R^-T */
1853           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1854           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1855           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1856           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1857           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1858           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1859           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1860           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1861           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1862           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1863 
1864           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1865           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1868           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1869           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1870           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1871           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1872           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1873 
1874           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1875              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1876              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1877           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1878           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1879           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1880           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1881           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1882           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1883           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1884           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1885           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1886           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1887 
1888           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1889           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1890           /* insert cols for primal dofs */
1891           for (j=0;j<primal_dofs;j++) {
1892             start_vals = &qr_basis[j*size_of_constraint];
1893             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1894             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1895           }
1896           /* insert cols for dual dofs */
1897           for (j=0,k=0;j<dual_dofs;k++) {
1898             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1899               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1900               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1901               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1902               j++;
1903             }
1904           }
1905 
1906           /* check change of basis */
1907           if (pcbddc->dbg_flag) {
1908             PetscInt   ii,jj;
1909             PetscBool valid_qr=PETSC_TRUE;
1910             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1911             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1912             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1913             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1914             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1915             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1916             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1917             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1918             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1919             for (jj=0;jj<size_of_constraint;jj++) {
1920               for (ii=0;ii<primal_dofs;ii++) {
1921                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1922                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1923               }
1924             }
1925             if (!valid_qr) {
1926               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1927               for (jj=0;jj<size_of_constraint;jj++) {
1928                 for (ii=0;ii<primal_dofs;ii++) {
1929                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1930                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1931                   }
1932                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1933                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1934                   }
1935                 }
1936               }
1937             } else {
1938               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1939             }
1940           }
1941         } else { /* simple transformation block */
1942           PetscInt row,col;
1943           PetscScalar val;
1944           for (j=0;j<size_of_constraint;j++) {
1945             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1946             if (!PetscBTLookup(is_primal,row)) {
1947               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1948               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1949               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1950             } else {
1951               for (k=0;k<size_of_constraint;k++) {
1952                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1953                 if (row != col) {
1954                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1955                 } else {
1956                   val = 1.0;
1957                 }
1958                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1959               }
1960             }
1961           }
1962         }
1963         /* increment primal counter */
1964         primal_counter += primal_dofs;
1965       } else {
1966         if (pcbddc->dbg_flag) {
1967           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1968         }
1969       }
1970       /* increment constraint counter total_counts */
1971       total_counts += primal_dofs;
1972     }
1973 
1974     /* free workspace */
1975     if (qr_needed) {
1976       if (pcbddc->dbg_flag) {
1977         ierr = PetscFree(work);CHKERRQ(ierr);
1978       }
1979       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1980       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1981       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1982       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1983       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1984     }
1985     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1986     /* assembling */
1987     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989     /*
1990     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1991     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1992     */
1993   }
1994 
1995   /* flush dbg viewer */
1996   if (pcbddc->dbg_flag) {
1997     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1998   }
1999 
2000   /* free workspace */
2001   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2002   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2003   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2004   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2005   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2006   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2007   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2008   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2009   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2010   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2016 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2017 {
2018   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2019   PC_IS       *pcis = (PC_IS*)pc->data;
2020   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2021   PetscInt    bs,ierr,i,vertex_size;
2022   PetscViewer viewer=pcbddc->dbg_viewer;
2023 
2024   PetscFunctionBegin;
2025   /* Init local Graph struct */
2026   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2027 
2028   /* Check validity of the csr graph passed in by the user */
2029   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2030     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2031   }
2032 
2033   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2034   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2035     Mat mat_adj;
2036     const PetscInt *xadj,*adjncy;
2037     PetscBool flg_row=PETSC_TRUE;
2038 
2039     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2040     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2041     if (!flg_row) {
2042       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2043     }
2044     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2045     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2046     if (!flg_row) {
2047       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2048     }
2049     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2050   }
2051 
2052   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2053   vertex_size = 1;
2054   if (!pcbddc->user_provided_isfordofs) {
2055     if (!pcbddc->n_ISForDofs) {
2056       IS *custom_ISForDofs;
2057 
2058       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2059       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2060       for (i=0;i<bs;i++) {
2061         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2062       }
2063       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2064       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2065       /* remove my references to IS objects */
2066       for (i=0;i<bs;i++) {
2067         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2068       }
2069       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2070     }
2071   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2072     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2073   }
2074 
2075   /* Setup of Graph */
2076   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2077 
2078   /* Graph's connected components analysis */
2079   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2080 
2081   /* print some info to stdout */
2082   if (pcbddc->dbg_flag) {
2083     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2090 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2091 {
2092   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2093   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   n = 0;
2098   vertices = 0;
2099   if (pcbddc->ConstraintMatrix) {
2100     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2101     for (i=0;i<local_primal_size;i++) {
2102       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2103       if (size_of_constraint == 1) n++;
2104       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2105     }
2106     if (vertices_idx) {
2107       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2108       n = 0;
2109       for (i=0;i<local_primal_size;i++) {
2110         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2111         if (size_of_constraint == 1) {
2112           vertices[n++]=row_cmat_indices[0];
2113         }
2114         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2115       }
2116     }
2117   }
2118   *n_vertices = n;
2119   if (vertices_idx) *vertices_idx = vertices;
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2125 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2126 {
2127   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2128   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2129   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2130   PetscBT        touched;
2131   PetscErrorCode ierr;
2132 
2133     /* This function assumes that the number of local constraints per connected component
2134        is not greater than the number of nodes defined for the connected component
2135        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2136   PetscFunctionBegin;
2137   n = 0;
2138   constraints_index = 0;
2139   if (pcbddc->ConstraintMatrix) {
2140     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2141     max_size_of_constraint = 0;
2142     for (i=0;i<local_primal_size;i++) {
2143       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2144       if (size_of_constraint > 1) {
2145         n++;
2146       }
2147       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2148       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2149     }
2150     if (constraints_idx) {
2151       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2152       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2153       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2154       n = 0;
2155       for (i=0;i<local_primal_size;i++) {
2156         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2157         if (size_of_constraint > 1) {
2158           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2159           /* find first untouched local node */
2160           j = 0;
2161           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2162           min_index = row_cmat_global_indices[j];
2163           min_loc = j;
2164           /* search the minimum among nodes not yet touched on the connected component
2165              since there can be more than one constraint on a single cc */
2166           for (j=1;j<size_of_constraint;j++) {
2167             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2168               min_index = row_cmat_global_indices[j];
2169               min_loc = j;
2170             }
2171           }
2172           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2173           constraints_index[n++] = row_cmat_indices[min_loc];
2174         }
2175         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2176       }
2177       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2178       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2179     }
2180   }
2181   *n_constraints = n;
2182   if (constraints_idx) *constraints_idx = constraints_index;
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 /* the next two functions has been adapted from pcis.c */
2187 #undef __FUNCT__
2188 #define __FUNCT__ "PCBDDCApplySchur"
2189 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2190 {
2191   PetscErrorCode ierr;
2192   PC_IS          *pcis = (PC_IS*)(pc->data);
2193 
2194   PetscFunctionBegin;
2195   if (!vec2_B) { vec2_B = v; }
2196   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2197   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2198   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2199   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2200   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2206 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2207 {
2208   PetscErrorCode ierr;
2209   PC_IS          *pcis = (PC_IS*)(pc->data);
2210 
2211   PetscFunctionBegin;
2212   if (!vec2_B) { vec2_B = v; }
2213   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2214   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2215   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2216   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2217   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "PCBDDCSubsetNumbering"
2223 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2224 {
2225   Vec            local_vec,global_vec;
2226   IS             seqis,paris;
2227   VecScatter     scatter_ctx;
2228   PetscScalar    *array;
2229   PetscInt       *temp_global_dofs;
2230   PetscScalar    globalsum;
2231   PetscInt       i,j,s;
2232   PetscInt       nlocals,first_index,old_index,max_local;
2233   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2234   PetscMPIInt    *dof_sizes,*dof_displs;
2235   PetscBool      first_found;
2236   PetscErrorCode ierr;
2237 
2238   PetscFunctionBegin;
2239   /* mpi buffers */
2240   MPI_Comm_size(comm,&size_prec_comm);
2241   MPI_Comm_rank(comm,&rank_prec_comm);
2242   j = ( !rank_prec_comm ? size_prec_comm : 0);
2243   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2244   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2245   /* get maximum size of subset */
2246   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2247   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2248   max_local = 0;
2249   if (n_local_dofs) {
2250     max_local = temp_global_dofs[0];
2251     for (i=1;i<n_local_dofs;i++) {
2252       if (max_local < temp_global_dofs[i] ) {
2253         max_local = temp_global_dofs[i];
2254       }
2255     }
2256   }
2257   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2258   max_global++;
2259   max_local = 0;
2260   if (n_local_dofs) {
2261     max_local = local_dofs[0];
2262     for (i=1;i<n_local_dofs;i++) {
2263       if (max_local < local_dofs[i] ) {
2264         max_local = local_dofs[i];
2265       }
2266     }
2267   }
2268   max_local++;
2269   /* allocate workspace */
2270   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2271   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2272   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2273   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2274   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2275   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2276   /* create scatter */
2277   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2278   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2279   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2280   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2281   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2282   /* init array */
2283   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2284   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2285   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2286   if (local_dofs_mult) {
2287     for (i=0;i<n_local_dofs;i++) {
2288       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2289     }
2290   } else {
2291     for (i=0;i<n_local_dofs;i++) {
2292       array[local_dofs[i]]=1.0;
2293     }
2294   }
2295   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2296   /* scatter into global vec and get total number of global dofs */
2297   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2298   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2299   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2300   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2301   /* Fill global_vec with cumulative function for global numbering */
2302   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2303   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2304   nlocals = 0;
2305   first_index = -1;
2306   first_found = PETSC_FALSE;
2307   for (i=0;i<s;i++) {
2308     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2309       first_found = PETSC_TRUE;
2310       first_index = i;
2311     }
2312     nlocals += (PetscInt)PetscRealPart(array[i]);
2313   }
2314   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2315   if (!rank_prec_comm) {
2316     dof_displs[0]=0;
2317     for (i=1;i<size_prec_comm;i++) {
2318       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2319     }
2320   }
2321   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2322   if (first_found) {
2323     array[first_index] += (PetscScalar)nlocals;
2324     old_index = first_index;
2325     for (i=first_index+1;i<s;i++) {
2326       if (PetscRealPart(array[i]) > 0.0) {
2327         array[i] += array[old_index];
2328         old_index = i;
2329       }
2330     }
2331   }
2332   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2333   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2334   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2335   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2336   /* get global ordering of local dofs */
2337   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2338   if (local_dofs_mult) {
2339     for (i=0;i<n_local_dofs;i++) {
2340       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2341     }
2342   } else {
2343     for (i=0;i<n_local_dofs;i++) {
2344       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2345     }
2346   }
2347   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2348   /* free workspace */
2349   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2350   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2351   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2352   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2353   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2354   /* return pointer to global ordering of local dofs */
2355   *global_numbering_subset = temp_global_dofs;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2361 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2362 {
2363   PetscInt       i,j;
2364   PetscScalar    *alphas;
2365   PetscErrorCode ierr;
2366 
2367   PetscFunctionBegin;
2368   /* this implements stabilized Gram-Schmidt */
2369   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2370   for (i=0;i<n;i++) {
2371     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2372     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2373     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2374   }
2375   ierr = PetscFree(alphas);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatConvert_IS_AIJ"
2382 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2383 {
2384   Mat new_mat;
2385   Mat_IS *matis = (Mat_IS*)(mat->data);
2386   /* info on mat */
2387   PetscInt bs,rows,cols;
2388   PetscInt lrows,lcols;
2389   PetscInt local_rows,local_cols;
2390   PetscMPIInt nsubdomains;
2391   /* preallocation */
2392   Vec vec_dnz,vec_onz;
2393   PetscScalar *my_dnz,*my_onz,*array;
2394   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2395   PetscInt  index_row,index_col,owner;
2396   PetscInt  *local_indices,*global_indices;
2397   /* work */
2398   PetscInt i,j;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   /* CHECKS */
2403   /* get info from mat */
2404   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2405   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2406   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2407   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2408 
2409   /* MAT_INITIAL_MATRIX */
2410   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2411   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2412   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2413   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2414   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2415   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2416 
2417   /*
2418     preallocation
2419   */
2420 
2421   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2422   /*
2423      Some vectors are needed to sum up properly on shared interface dofs.
2424      Note that preallocation is not exact, since it overestimates nonzeros
2425   */
2426 /*
2427   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2428   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2429   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2430   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2431 */
2432   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2433   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2434   /* All processes need to compute entire row ownership */
2435   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2436   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2437   for (i=0;i<nsubdomains;i++) {
2438     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2439       row_ownership[j]=i;
2440     }
2441   }
2442   /* map indices of local mat to global values */
2443   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2444   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2445   for (i=0;i<local_rows;i++) local_indices[i]=i;
2446   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2447   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2448 
2449   /*
2450      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2451      then, they will be summed up properly. This way, preallocation is always sufficient
2452   */
2453   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2454   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2455   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2456   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2457   for (i=0;i<local_rows;i++) {
2458     index_row = global_indices[i];
2459     for (j=i;j<local_rows;j++) {
2460       owner = row_ownership[index_row];
2461       index_col = global_indices[j];
2462       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2463         my_dnz[i] += 1.0;
2464       } else { /* offdiag block */
2465         my_onz[i] += 1.0;
2466       }
2467       /* same as before, interchanging rows and cols */
2468       if (i != j) {
2469         owner = row_ownership[index_col];
2470         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2471           my_dnz[j] += 1.0;
2472         } else {
2473           my_onz[j] += 1.0;
2474         }
2475       }
2476     }
2477   }
2478   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2479   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2480   if (local_rows) { /* multilevel guard */
2481     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2482     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2483   }
2484   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2485   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2486   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2487   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2488   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2489   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2490   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2491 
2492   /* set computed preallocation in dnz and onz */
2493   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2494   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2495   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2496   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2497   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2498   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2499   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2500   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2501 
2502   /* Resize preallocation if overestimated */
2503   for (i=0;i<lrows;i++) {
2504     dnz[i] = PetscMin(dnz[i],lcols);
2505     onz[i] = PetscMin(onz[i],cols-lcols);
2506   }
2507   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2508   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2509 
2510   /*
2511     Set values. Very Basic.
2512   */
2513   for (i=0;i<local_rows;i++) {
2514     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2515     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2516     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2517     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2518     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2519   }
2520   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2521   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2522   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2523 
2524   /* get back mat */
2525   *M = new_mat;
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 #undef __FUNCT__
2530 #define __FUNCT__ "MatISSubassemble_Private"
2531 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2532 {
2533   Mat             subdomain_adj;
2534   IS              new_ranks,ranks_send_to;
2535   MatPartitioning partitioner;
2536   Mat_IS          *matis;
2537   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2538   PetscInt        prank;
2539   PetscMPIInt     size,rank,color;
2540   PetscInt        *xadj,*adjncy,*oldranks;
2541   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2542   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2543   PetscErrorCode  ierr;
2544   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2545   PetscSubcomm    subcomm;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2549   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2550   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2551 
2552   /* Get info on mapping */
2553   matis = (Mat_IS*)(mat->data);
2554   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2555   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2556 
2557   /* build local CSR graph of subdomains' connectivity */
2558   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2559   xadj[0] = 0;
2560   xadj[1] = PetscMax(n_neighs-1,0);
2561   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2562   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2563 
2564   if (threshold) {
2565     PetscInt* count,min_threshold;
2566     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2567     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2568     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2569       for (j=0;j<n_shared[i];j++) {
2570         count[shared[i][j]] += 1;
2571       }
2572     }
2573     /* adapt threshold since we dont want to lose significant connections */
2574     min_threshold = n_neighs;
2575     for (i=1;i<n_neighs;i++) {
2576       for (j=0;j<n_shared[i];j++) {
2577         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2578       }
2579     }
2580     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2581     threshold = PetscMax(min_threshold+1,threshold);
2582     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2583     xadj[1] = 0;
2584     for (i=1;i<n_neighs;i++) {
2585       for (j=0;j<n_shared[i];j++) {
2586         if (count[shared[i][j]] < threshold) {
2587           adjncy[xadj[1]] = neighs[i];
2588           adjncy_wgt[xadj[1]] = n_shared[i];
2589           xadj[1]++;
2590           break;
2591         }
2592       }
2593     }
2594     ierr = PetscFree(count);CHKERRQ(ierr);
2595   } else {
2596     if (xadj[1]) {
2597       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2598       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2599     }
2600   }
2601   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2602   if (use_square) {
2603     for (i=0;i<xadj[1];i++) {
2604       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2605     }
2606   }
2607   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2608 
2609   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2610 
2611   /*
2612     Restrict work on active processes only.
2613   */
2614   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2615   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2616   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2617   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2618   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2619   if (color) {
2620     ierr = PetscFree(xadj);CHKERRQ(ierr);
2621     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2622     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2623   } else {
2624     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2625     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2626     prank = rank;
2627     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2628     for (i=0;i<size;i++) {
2629       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2630     }
2631     for (i=0;i<xadj[1];i++) {
2632       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2633     }
2634     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2635     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2636     n_subdomains = (PetscInt)size;
2637     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2638 
2639     /* Partition */
2640     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2641     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2642     if (use_vwgt) {
2643       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2644       v_wgt[0] = local_size;
2645       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2646     }
2647     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2648     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2649     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2650     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2651     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2652 
2653     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2654     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2655     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2656     /* clean up */
2657     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2658     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2659     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2660     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2661   }
2662   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2663 
2664   /* assemble parallel IS for sends */
2665   i = 1;
2666   if (color) i=0;
2667   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2668   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2669 
2670   /* get back IS */
2671   *is_sends = ranks_send_to;
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatISSubassemble"
2679 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2680 {
2681   Mat                    local_mat,new_mat;
2682   Mat_IS                 *matis;
2683   IS                     is_sends_internal;
2684   PetscInt               rows,cols;
2685   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2686   PetscBool              ismatis,isdense;
2687   ISLocalToGlobalMapping l2gmap;
2688   PetscInt*              l2gmap_indices;
2689   const PetscInt*        is_indices;
2690   MatType                new_local_type;
2691   MatTypePrivate         new_local_type_private;
2692   /* buffers */
2693   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2694   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2695   /* MPI */
2696   MPI_Comm               comm;
2697   PetscMPIInt            n_sends,n_recvs,commsize;
2698   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2699   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2700   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2701   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2702   PetscErrorCode         ierr;
2703 
2704   PetscFunctionBegin;
2705   /* checks */
2706   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2707   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2708   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2709   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2710   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2711   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2712   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2713   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2714   PetscValidLogicalCollectiveInt(mat,bs,0);
2715   /* prepare IS for sending if not provided */
2716   if (!is_sends) {
2717     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2718     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2719   } else {
2720     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2721     is_sends_internal = is_sends;
2722   }
2723 
2724   /* get pointer of MATIS data */
2725   matis = (Mat_IS*)mat->data;
2726 
2727   /* get comm */
2728   comm = PetscObjectComm((PetscObject)mat);
2729 
2730   /* compute number of sends */
2731   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2732   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2733 
2734   /* compute number of receives */
2735   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2736   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2737   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2738   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2739   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2740   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2741   ierr = PetscFree(iflags);CHKERRQ(ierr);
2742 
2743   /* prepare send/receive buffers */
2744   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2745   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2746   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2747   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2748 
2749   /* Get data from local mat */
2750   if (!isdense) {
2751     /* TODO: See below some guidelines on how to prepare the local buffers */
2752     /*
2753        send_buffer_vals should contain the raw values of the local matrix
2754        send_buffer_idxs should contain:
2755        - MatType_PRIVATE type
2756        - PetscInt        size_of_l2gmap
2757        - PetscInt        global_row_indices[size_of_l2gmap]
2758        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2759     */
2760   } else {
2761     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2762     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2763     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2764     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2765     send_buffer_idxs[1] = i;
2766     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2767     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2768     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2769     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2770     for (i=0;i<n_sends;i++) {
2771       ilengths_vals[is_indices[i]] = len*len;
2772       ilengths_idxs[is_indices[i]] = len+2;
2773     }
2774   }
2775   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2776   buf_size_idxs = 0;
2777   buf_size_vals = 0;
2778   for (i=0;i<n_recvs;i++) {
2779     buf_size_idxs += (PetscInt)olengths_idxs[i];
2780     buf_size_vals += (PetscInt)olengths_vals[i];
2781   }
2782   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2783   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2784 
2785   /* get new tags for clean communications */
2786   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2787   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2788 
2789   /* allocate for requests */
2790   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2791   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2792   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2793   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2794 
2795   /* communications */
2796   ptr_idxs = recv_buffer_idxs;
2797   ptr_vals = recv_buffer_vals;
2798   for (i=0;i<n_recvs;i++) {
2799     source_dest = onodes[i];
2800     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2801     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2802     ptr_idxs += olengths_idxs[i];
2803     ptr_vals += olengths_vals[i];
2804   }
2805   for (i=0;i<n_sends;i++) {
2806     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2807     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2808     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2809   }
2810   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2811   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2812 
2813   /* assemble new l2g map */
2814   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2815   ptr_idxs = recv_buffer_idxs;
2816   buf_size_idxs = 0;
2817   for (i=0;i<n_recvs;i++) {
2818     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2819     ptr_idxs += olengths_idxs[i];
2820   }
2821   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2822   ptr_idxs = recv_buffer_idxs;
2823   buf_size_idxs = 0;
2824   for (i=0;i<n_recvs;i++) {
2825     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2826     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2827     ptr_idxs += olengths_idxs[i];
2828   }
2829   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2830   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2831   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2832 
2833   /* infer new local matrix type from received local matrices type */
2834   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2835   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2836   new_local_type_private = MATAIJ_PRIVATE;
2837   new_local_type = MATSEQAIJ;
2838   if (n_recvs) {
2839     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2840     ptr_idxs = recv_buffer_idxs;
2841     for (i=0;i<n_recvs;i++) {
2842       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2843         new_local_type_private = MATAIJ_PRIVATE;
2844         break;
2845       }
2846       ptr_idxs += olengths_idxs[i];
2847     }
2848     switch (new_local_type_private) {
2849       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2850         new_local_type = MATSEQAIJ;
2851         bs = 1;
2852         break;
2853       case MATAIJ_PRIVATE:
2854         new_local_type = MATSEQAIJ;
2855         bs = 1;
2856         break;
2857       case MATBAIJ_PRIVATE:
2858         new_local_type = MATSEQBAIJ;
2859         break;
2860       case MATSBAIJ_PRIVATE:
2861         new_local_type = MATSEQSBAIJ;
2862         break;
2863       default:
2864         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2865         break;
2866     }
2867   }
2868 
2869   /* create MATIS object */
2870   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2871   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2872   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2873   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2874   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2875   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2876 
2877   /* set values */
2878   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2879   ptr_vals = recv_buffer_vals;
2880   ptr_idxs = recv_buffer_idxs;
2881   for (i=0;i<n_recvs;i++) {
2882     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2883       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2884       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2885       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2886       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2887       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2888     }
2889     ptr_idxs += olengths_idxs[i];
2890     ptr_vals += olengths_vals[i];
2891   }
2892   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2893   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2894   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2895   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896 
2897   { /* check */
2898     Vec       lvec,rvec;
2899     PetscReal infty_error;
2900 
2901     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2902     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2903     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2904     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2905     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2906     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2907     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2908     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2909     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2910   }
2911 
2912   /* free workspace */
2913   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2914   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2915   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2916   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2917   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2918   if (isdense) {
2919     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2920     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2921   } else {
2922     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2923   }
2924   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2925   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2926   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2927   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2928   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2929   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2930   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2931   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2932   ierr = PetscFree(onodes);CHKERRQ(ierr);
2933   /* get back new mat */
2934   *mat_n = new_mat;
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2940 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2941 {
2942   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2943   PC_IS                  *pcis = (PC_IS*)pc->data;
2944   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2945   MatNullSpace           CoarseNullSpace=NULL;
2946   ISLocalToGlobalMapping coarse_islg;
2947   IS                     coarse_is;
2948   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2949   PetscInt               im_active=-1,active_procs=-1;
2950   PC                     pc_temp;
2951   PCType                 coarse_pc_type;
2952   KSPType                coarse_ksp_type;
2953   PetscBool              multilevel_requested,multilevel_allowed;
2954   PetscBool              setsym,issym,isbddc,isnn;
2955   MatStructure           matstruct;
2956   PetscErrorCode         ierr;
2957 
2958   PetscFunctionBegin;
2959   /* Assign global numbering to coarse dofs */
2960   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2961 
2962   /* infer some info from user */
2963   issym = PETSC_FALSE;
2964   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2965   multilevel_allowed = PETSC_FALSE;
2966   multilevel_requested = PETSC_FALSE;
2967   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2968   if (multilevel_requested) {
2969     /* count "active processes" */
2970     im_active = !!(pcis->n);
2971     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2972     if (active_procs/pcbddc->coarsening_ratio < 2) {
2973       multilevel_allowed = PETSC_FALSE;
2974     } else {
2975       multilevel_allowed = PETSC_TRUE;
2976     }
2977   }
2978 
2979   /* set defaults for coarse KSP and PC */
2980   if (multilevel_allowed) {
2981     if (issym) {
2982       coarse_ksp_type = KSPRICHARDSON;
2983     } else {
2984       coarse_ksp_type = KSPCHEBYSHEV;
2985     }
2986     coarse_pc_type = PCBDDC;
2987   } else {
2988     coarse_ksp_type = KSPPREONLY;
2989     coarse_pc_type = PCREDUNDANT;
2990   }
2991 
2992   /* create the coarse KSP object only once with defaults */
2993   if (!pcbddc->coarse_ksp) {
2994     char prefix[256],str_level[3];
2995     size_t len;
2996     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2997     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2998     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2999     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3000     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3001     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3002     /* prefix */
3003     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3004     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3005     if (!pcbddc->current_level) {
3006       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3007       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3008     } else {
3009       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3010       if (pcbddc->current_level>1) len -= 2;
3011       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3012       *(prefix+len)='\0';
3013       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3014       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3015     }
3016     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3017   }
3018   /* allow user customization */
3019   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3020   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3021   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3022 
3023   /* get some info after set from options */
3024   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3025   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3026   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3027   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3028     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3029     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3030     isbddc = PETSC_FALSE;
3031   }
3032 
3033   /* propagate BDDC info to the next level */
3034   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3035   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3036   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3037 
3038   /* creates temporary MATIS object for coarse matrix */
3039   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3040   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3041   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3042   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3043   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3044   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3046   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3047   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3048   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3049 
3050   /* assemble coarse matrix */
3051   if (isbddc || isnn) {
3052     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3053   } else {
3054     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3055   }
3056   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3057 
3058   /* create local to global scatters for coarse problem */
3059   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3060   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3061   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3062 
3063   /* propagate symmetry info to coarse matrix */
3064   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3065 
3066   /* Compute coarse null space (special handling by BDDC only) */
3067   if (pcbddc->NullSpace) {
3068     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3069     if (isbddc) {
3070       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3071     } else {
3072       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3073     }
3074   }
3075 
3076   /* set operators */
3077   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3078   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3079 
3080   /* additional KSP customization */
3081   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3082   if (max_it < 5) {
3083     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3084   }
3085   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3086 
3087 
3088   /* print some info if requested */
3089   if (pcbddc->dbg_flag) {
3090     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3091     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3092     if (!multilevel_allowed) {
3093       if (multilevel_requested) {
3094         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3095       } else if (pcbddc->max_levels) {
3096         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3097       }
3098     }
3099     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3100     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3101   }
3102 
3103   /* setup coarse ksp */
3104   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3105   if (pcbddc->dbg_flag) {
3106     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3107     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3108     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3109     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3110   }
3111 
3112   /* Check coarse problem if requested */
3113   if (pcbddc->dbg_flag) {
3114     KSP       check_ksp;
3115     KSPType   check_ksp_type;
3116     PC        check_pc;
3117     Vec       check_vec;
3118     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3119     PetscInt  its;
3120     PetscBool ispreonly,compute;
3121 
3122     /* Create ksp object suitable for estimation of extreme eigenvalues */
3123     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3124     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3125     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3126     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3127     if (ispreonly) {
3128       check_ksp_type = KSPPREONLY;
3129       compute = PETSC_FALSE;
3130     } else {
3131       if (issym) check_ksp_type = KSPCG;
3132       else check_ksp_type = KSPGMRES;
3133       compute = PETSC_TRUE;
3134     }
3135     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3136     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3137     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3138     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3139     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3140     /* create random vec */
3141     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3142     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3143     if (CoarseNullSpace) {
3144       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3145     }
3146     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3147     /* solve coarse problem */
3148     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3149     if (CoarseNullSpace) {
3150       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3151     }
3152     /* check coarse problem residual error */
3153     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3154     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3155     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3156     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3157     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3158     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3159     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3160     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3161     /* get eigenvalue estimation if preonly has not been requested */
3162     if (!ispreonly) {
3163       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3164       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3165       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3166     }
3167     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3168     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3169   }
3170   /* free memory */
3171   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3172   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3178 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3179 {
3180   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3181   PC_IS*         pcis = (PC_IS*)pc->data;
3182   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3183   PetscInt       i,j,coarse_size;
3184   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3185   PetscErrorCode ierr;
3186 
3187   PetscFunctionBegin;
3188   /* get indices in local ordering for vertices and constraints */
3189   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3190   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3191   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3192   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3193   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3194   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3195   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3196 
3197   /* Compute global number of coarse dofs */
3198   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3199 
3200   /* check numbering */
3201   if (pcbddc->dbg_flag) {
3202     PetscScalar coarsesum,*array;
3203 
3204     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3205     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3206     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3207     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3208     for (i=0;i<pcbddc->local_primal_size;i++) {
3209       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3210     }
3211     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3212     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3213     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3214     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3215     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3216     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3217     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3218     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3219     for (i=0;i<pcis->n;i++) {
3220       if (array[i] == 1.0) {
3221         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3222       }
3223     }
3224     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3225     for (i=0;i<pcis->n;i++) {
3226       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3227     }
3228     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3229     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3230     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3231     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3232     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3233     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3234     if (pcbddc->dbg_flag > 1) {
3235       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3236       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3237       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3238       for (i=0;i<pcbddc->local_primal_size;i++) {
3239         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3240       }
3241       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3242     }
3243     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3244   }
3245   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3246   /* get back data */
3247   *coarse_size_n = coarse_size;
3248   *local_primal_indices_n = local_primal_indices;
3249   PetscFunctionReturn(0);
3250 }
3251 
3252