xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 5e8657ed170075d82e3119957ba531ffe97f1ad9)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCResetSolvers"
84 PetscErrorCode PCBDDCResetSolvers(PC pc)
85 {
86   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
87   PetscErrorCode ierr;
88 
89   PetscFunctionBegin;
90   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
91   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
119 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
120 {
121   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
122   PC_IS          *pcis = (PC_IS*)pc->data;
123   VecType        impVecType;
124   PetscInt       n_constraints,n_R,old_size;
125   PetscErrorCode ierr;
126 
127   PetscFunctionBegin;
128   if (!pcbddc->ConstraintMatrix) {
129     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
130   }
131   /* get sizes */
132   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
133   n_R = pcis->n-pcbddc->n_actual_vertices;
134   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
135   /* local work vectors (try to avoid unneeded work)*/
136   /* R nodes */
137   old_size = -1;
138   if (pcbddc->vec1_R) {
139     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
140   }
141   if (n_R != old_size) {
142     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
144     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
146     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
147     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
148   }
149   /* local primal dofs */
150   old_size = -1;
151   if (pcbddc->vec1_P) {
152     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
153   }
154   if (pcbddc->local_primal_size != old_size) {
155     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
157     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
158     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
159   }
160   /* local explicit constraints */
161   old_size = -1;
162   if (pcbddc->vec1_C) {
163     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
164   }
165   if (n_constraints && n_constraints != old_size) {
166     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
168     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
169     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "PCBDDCSetUpCorrection"
176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
177 {
178   PetscErrorCode         ierr;
179   /* pointers to pcis and pcbddc */
180   PC_IS*                 pcis = (PC_IS*)pc->data;
181   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
182   /* submatrices of local problem */
183   Mat                    A_RV,A_VR,A_VV;
184   /* working matrices */
185   Mat                    M1,M2,M3,C_CR;
186   /* working vectors */
187   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
188   /* additional working stuff */
189   IS                     is_aux;
190   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
191   const PetscScalar      *array,*row_cmat_values;
192   const PetscInt         *row_cmat_indices,*idx_R_local;
193   PetscInt               *idx_V_B,*auxindices;
194   PetscInt               n_vertices,n_constraints,size_of_constraint;
195   PetscInt               i,j,n_R,n_D,n_B;
196   PetscBool              unsymmetric_check;
197   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
198   MatType                impMatType;
199   /* some shortcuts to scalars */
200   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
201   /* for debugging purposes */
202   PetscReal              *coarsefunctions_errors,*constraints_errors;
203 
204   PetscFunctionBegin;
205   /* get number of vertices (corners plus constraints with change of basis)
206      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
207   n_vertices = pcbddc->n_actual_vertices;
208   n_constraints = pcbddc->local_primal_size-n_vertices;
209   /* Set Non-overlapping dimensions */
210   n_B = pcis->n_B; n_D = pcis->n - n_B;
211   n_R = pcis->n-n_vertices;
212 
213   /* Set types for local objects needed by BDDC precondtioner */
214   impMatType = MATSEQDENSE;
215 
216   /* Allocating some extra storage just to be safe */
217   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
218   for (i=0;i<pcis->n;i++) auxindices[i]=i;
219 
220   /* vertices in boundary numbering */
221   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
222   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
223   if (i != n_vertices) {
224     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
225   }
226 
227   /* Precompute stuffs needed for preprocessing and application of BDDC*/
228   if (n_constraints) {
229     /* see if we can save some allocations */
230     if (pcbddc->local_auxmat2) {
231       PetscInt on_R,on_constraints;
232       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
233       if (on_R != n_R || on_constraints != n_constraints) {
234         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
235         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
236       }
237     }
238     /* work vectors */
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
241     /* auxiliary matrices */
242     if (!pcbddc->local_auxmat2) {
243       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
244       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
245       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
246       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
247     }
248 
249     /* Extract constraints on R nodes: C_{CR}  */
250     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
251     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
252     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
253 
254     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
255     for (i=0;i<n_constraints;i++) {
256       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
257       /* Get row of constraint matrix in R numbering */
258       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
260       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
262       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
263       /* Solve for row of constraint matrix in R numbering */
264       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
265       /* Set values in local_auxmat2 */
266       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
268       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269     }
270     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
273 
274     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
275     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
276     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
277     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
278     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
279     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
280     ierr = MatSetUp(M1);CHKERRQ(ierr);
281     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
282     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
283     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
284     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
285     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
286     ierr = MatDestroy(&M2);CHKERRQ(ierr);
287     ierr = MatDestroy(&M3);CHKERRQ(ierr);
288     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
289     if (!pcbddc->local_auxmat1) {
290       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     } else {
292       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     }
294   }
295 
296   /* Get submatrices from subdomain matrix */
297   if (n_vertices) {
298     PetscInt ibs,mbs;
299     PetscBool issbaij;
300     Mat newmat;
301 
302     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
303     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
304     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
305     if (ibs != mbs) { /* need to convert to SEQAIJ */
306       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
310       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
311     } else {
312       /* this is safe */
313       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
314       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
315       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
316         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
317         /* which of the two approaches is faster? */
318         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
320         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
322         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
323       } else {
324         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
325         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
326       }
327     }
328     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
329     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
330     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
331   }
332 
333   /* Matrix of coarse basis functions (local) */
334   if (pcbddc->coarse_phi_B) {
335     PetscInt on_B,on_primal;
336     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
337     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
338       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
339       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
340     }
341   }
342   if (pcbddc->coarse_phi_D) {
343     PetscInt on_D,on_primal;
344     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
345     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
346       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
347       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
348     }
349   }
350   if (!pcbddc->coarse_phi_B) {
351     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
352     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
353     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
354     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
355   }
356   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
357     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
358     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
359     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
360     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
361   }
362 
363   if (pcbddc->dbg_flag) {
364     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
366     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
367   }
368   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
369   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
370 
371   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
372 
373   /* vertices */
374   for (i=0;i<n_vertices;i++) {
375     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
376     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
377     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
378     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
379     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
380     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
381     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
382     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
385     if (n_constraints) {
386       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
387       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
388       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     }
390     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
392 
393     /* Set values in coarse basis function and subdomain part of coarse_mat */
394     /* coarse basis functions */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
402     if (pcbddc->switch_static || pcbddc->dbg_flag) {
403       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
407       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408     }
409     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
410     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     if (n_constraints) {
414       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417     }
418 
419     /* check */
420     if (pcbddc->dbg_flag) {
421       /* assemble subdomain vector on local nodes */
422       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
423       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       if (n_R) {
425         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
426       }
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
429       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
430       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
431       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
432       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
433       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       if (n_constraints) {
437         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       }
441       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
443       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
444       /* check saddle point solution */
445       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
447       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
448       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
449       /* shift by the identity matrix */
450       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
454     }
455   }
456 
457   /* constraints */
458   for (i=0;i<n_constraints;i++) {
459     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
460     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
462     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
463     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
464     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
465     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
466     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
467     if (n_vertices) {
468       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
469     }
470     /* Set values in coarse basis function and subdomain part of coarse_mat */
471     /* coarse basis functions */
472     j = i+n_vertices; /* don't touch this! */
473     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
474     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
477     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
478     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
483       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
484       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485     }
486     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
487     if (n_vertices) {
488       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
489       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
490       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
491     }
492     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
493     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
494     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
495 
496     if (pcbddc->dbg_flag) {
497       /* assemble subdomain vector on nodes */
498       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
499       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
500       if (n_R) {
501         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
502       }
503       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
504       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
505       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
506       /* assemble subdomain vector of lagrange multipliers */
507       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
508       if (n_vertices) {
509         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
510         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
511         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
512       }
513       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
514       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
515       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
517       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
518       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
519       /* check saddle point solution */
520       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
521       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
522       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
523       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
524       /* shift by the identity matrix */
525       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
526       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
527       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
528       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
529     }
530   }
531   /* call assembling routines for local coarse basis */
532   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534   if (pcbddc->switch_static || pcbddc->dbg_flag) {
535     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537   }
538 
539   /* compute other basis functions for non-symmetric problems */
540   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
541   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
542     PetscBool setsym;
543     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
544     if (!setsym) pcbddc->issym = PETSC_FALSE;
545   }
546 
547   if (!pcbddc->issym) {
548     if (!pcbddc->coarse_psi_B) {
549       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
550       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
551       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
552       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
553     }
554     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
555       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
556       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
557       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
558       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
559     }
560     for (i=0;i<pcbddc->local_primal_size;i++) {
561       if (n_constraints) {
562         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
563         for (j=0;j<n_constraints;j++) {
564           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
565         }
566         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
567         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
568       }
569       if (i<n_vertices) {
570         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
571         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
572         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
573         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
574         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
575         if (n_constraints) {
576           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
577         }
578       } else {
579         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
580       }
581       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
582       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
583       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
585       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
586       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
587       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       if (i<n_vertices) {
589         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
590       }
591       if (pcbddc->switch_static || pcbddc->dbg_flag) {
592         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
593         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
594         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
595         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
596         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597       }
598 
599       if (pcbddc->dbg_flag) {
600         /* assemble subdomain vector on nodes */
601         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
602         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
603         if (n_R) {
604           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
605         }
606         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
607         if (i<n_vertices) {
608           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
609         }
610         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
612         /* assemble subdomain vector of lagrange multipliers */
613         for (j=0;j<pcbddc->local_primal_size;j++) {
614           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
615         }
616         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
617         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
618         /* check saddle point solution */
619         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
620         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
621         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
622         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
623         /* shift by the identity matrix */
624         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
625         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
626         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
627         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
628       }
629     }
630     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
631     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
632     if (pcbddc->switch_static || pcbddc->dbg_flag) {
633       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
635     }
636     unsymmetric_check = PETSC_TRUE;
637   } else { /* take references to already computed coarse basis */
638     unsymmetric_check = PETSC_FALSE;
639     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
640     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
641     if (pcbddc->coarse_phi_D) {
642       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
643       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
644     }
645   }
646   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
647   /* Checking coarse_sub_mat and coarse basis functios */
648   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
649   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
650   if (pcbddc->dbg_flag) {
651     Mat         coarse_sub_mat;
652     Mat         AUXMAT,TM1,TM2,TM3,TM4;
653     Mat         coarse_phi_D,coarse_phi_B;
654     Mat         coarse_psi_D,coarse_psi_B;
655     Mat         A_II,A_BB,A_IB,A_BI;
656     MatType     checkmattype=MATSEQAIJ;
657     PetscReal   real_value;
658 
659     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
660     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
661     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
663     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
664     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
665     if (unsymmetric_check) {
666       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
667       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
668     }
669     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
670 
671     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
672     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
673     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
674     if (unsymmetric_check) {
675       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
676       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
677       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
678       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
679       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
680       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
681       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
682       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
683       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
684       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
685       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
686       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
687     } else {
688       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
689       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
690       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
691       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
692       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
693       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
694       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
695       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
696     }
697     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
698     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
699     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
703     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
708     for (i=0;i<pcbddc->local_primal_size;i++) {
709       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
710     }
711     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
712     for (i=0;i<pcbddc->local_primal_size;i++) {
713       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
714     }
715     if (unsymmetric_check) {
716       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
717       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
718         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
719       }
720       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
721       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
722         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
723       }
724     }
725     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
730     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
731     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
734     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
735     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
736     if (unsymmetric_check) {
737       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
738       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
739     }
740     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
741     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
742     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
743     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
744   }
745   /* free memory */
746   if (n_vertices) {
747     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
748     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
749     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
750     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
752   }
753   if (n_constraints) {
754     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
755     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
756     ierr = MatDestroy(&M1);CHKERRQ(ierr);
757     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
758   }
759   ierr = PetscFree(auxindices);CHKERRQ(ierr);
760   /* get back data */
761   *coarse_submat_vals_n = coarse_submat_vals;
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
767 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
768 {
769   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
770   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
771   Mat            *change_mat_all;
772   IS             is_local,is_global;
773   PetscBool      sorted,isseqaij;
774   PetscInt       local_size,*idxs_perm;
775   PetscErrorCode ierr;
776 
777   PetscFunctionBegin;
778   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
779   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
780   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
781   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
782   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
783   ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
784   if (!sorted) {
785     const PetscInt *idxs;
786     PetscInt *idxs_sorted,i;
787 
788     ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr);
789     ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr);
790     for (i=0;i<local_size;i++) {
791       idxs_perm[i] = i;
792     }
793     ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
794     ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr);
795     for (i=0;i<local_size;i++) {
796       idxs_sorted[i] = idxs[idxs_perm[i]];
797     }
798     ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
799     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
800     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
801   }
802 
803   /* get change of basis on the whole set of local dofs */
804   ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
805 
806   if (!sorted) {
807     Mat      new_mat;
808     IS       is_perm;
809     PetscInt *idxs,i;
810 
811     ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr);
812     for (i=0;i<local_size;i++) {
813       idxs[idxs_perm[i]] = i;
814     }
815     ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
816     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
817     ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
818     ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
819     ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
820     change_mat_all[0] = new_mat;
821     ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
822   }
823 
824   /* check */
825   if (pcbddc->dbg_flag) {
826     Vec       x,x_change;
827     PetscReal error;
828 
829     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
830     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
831     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
832     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834     ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr);
835     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
836     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
837     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
838     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
839     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
840     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
841     ierr = VecDestroy(&x);CHKERRQ(ierr);
842     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
843   }
844 
845   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
846   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
847   if (isseqaij) {
848     ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
849   } else {
850     Mat work_mat;
851     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
852     ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
853     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
854   }
855   /*
856   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
857   ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
858   */
859   ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
860   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
867 {
868   PC_IS*         pcis = (PC_IS*)(pc->data);
869   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
870   IS             is_aux1,is_aux2;
871   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
872   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
873   PetscInt       vbs,bs;
874   PetscBT        bitmask;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   /*
879     No need to setup local scatters if
880       - primal space is unchanged
881         AND
882       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
883         AND
884       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
885   */
886   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
887     PetscFunctionReturn(0);
888   }
889   /* destroy old objects */
890   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
892   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
893   /* Set Non-overlapping dimensions */
894   n_B = pcis->n_B; n_D = pcis->n - n_B;
895   n_vertices = pcbddc->n_actual_vertices;
896   /* create auxiliary bitmask */
897   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
898   for (i=0;i<n_vertices;i++) {
899     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
900   }
901 
902   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
903   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
904   for (i=0, n_R=0; i<pcis->n; i++) {
905     if (!PetscBTLookup(bitmask,i)) {
906       idx_R_local[n_R] = i;
907       n_R++;
908     }
909   }
910 
911   /* Block code */
912   vbs = 1;
913   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
914   if (bs>1 && !(n_vertices%bs)) {
915     PetscBool is_blocked = PETSC_TRUE;
916     PetscInt  *vary;
917     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
918     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
919     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
920     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
921     for (i=0; i<n_vertices; i++) {
922       if (vary[i]!=0 && vary[i]!=bs) {
923         is_blocked = PETSC_FALSE;
924         break;
925       }
926     }
927     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
928       vbs = bs;
929       for (i=0;i<n_R/vbs;i++) {
930         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
931       }
932     }
933     ierr = PetscFree(vary);CHKERRQ(ierr);
934   }
935   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
936   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
937 
938   /* print some info if requested */
939   if (pcbddc->dbg_flag) {
940     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
941     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
942     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
943     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
947     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
948   }
949 
950   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
951   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
952   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
953   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
954   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
955   for (i=0; i<n_D; i++) {
956     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
957   }
958   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
959   for (i=0, j=0; i<n_R; i++) {
960     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
961       aux_array1[j++] = i;
962     }
963   }
964   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
965   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
966   for (i=0, j=0; i<n_B; i++) {
967     if (!PetscBTLookup(bitmask,is_indices[i])) {
968       aux_array2[j++] = i;
969     }
970   }
971   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
972   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
973   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
974   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
975   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
976 
977   if (pcbddc->switch_static || pcbddc->dbg_flag) {
978     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
979     for (i=0, j=0; i<n_R; i++) {
980       if (PetscBTLookup(bitmask,idx_R_local[i])) {
981         aux_array1[j++] = i;
982       }
983     }
984     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
985     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
986     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
987   }
988   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
989   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
996 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
997 {
998   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
999   PC_IS          *pcis = (PC_IS*)pc->data;
1000   PC             pc_temp;
1001   Mat            A_RR;
1002   MatReuse       reuse;
1003   PetscScalar    m_one = -1.0;
1004   PetscReal      value;
1005   PetscInt       n_D,n_R,ibs,mbs;
1006   PetscBool      use_exact,use_exact_reduced,issbaij;
1007   PetscErrorCode ierr;
1008   /* prefixes stuff */
1009   char           dir_prefix[256],neu_prefix[256],str_level[16];
1010   size_t         len;
1011 
1012   PetscFunctionBegin;
1013 
1014   /* compute prefixes */
1015   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1016   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1017   if (!pcbddc->current_level) {
1018     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1019     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1020     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1021     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1022   } else {
1023     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1024     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1025     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1026     len -= 15; /* remove "pc_bddc_coarse_" */
1027     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1028     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1029     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1030     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1031     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1032     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1033     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1034     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1035   }
1036 
1037   /* DIRICHLET PROBLEM */
1038   /* Matrix for Dirichlet problem is pcis->A_II */
1039   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1040   if (!pcbddc->ksp_D) { /* create object if not yet build */
1041     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1042     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1043     /* default */
1044     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1045     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1046     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1047     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1048     if (issbaij) {
1049       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1050     } else {
1051       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1052     }
1053     /* Allow user's customization */
1054     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1055     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1056   }
1057   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1058   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1059   if (!n_D) {
1060     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1061     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1062   }
1063   /* Set Up KSP for Dirichlet problem of BDDC */
1064   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1065   /* set ksp_D into pcis data */
1066   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1067   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1068   pcis->ksp_D = pcbddc->ksp_D;
1069 
1070   /* NEUMANN PROBLEM */
1071   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1072   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1073   if (pcbddc->ksp_R) { /* already created ksp */
1074     PetscInt nn_R;
1075     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1076     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1077     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1078     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1079       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1080       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1081       reuse = MAT_INITIAL_MATRIX;
1082     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1083       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1084         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1085         reuse = MAT_INITIAL_MATRIX;
1086       } else { /* safe to reuse the matrix */
1087         reuse = MAT_REUSE_MATRIX;
1088       }
1089     }
1090     /* last check */
1091     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1092       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1093       reuse = MAT_INITIAL_MATRIX;
1094     }
1095   } else { /* first time, so we need to create the matrix */
1096     reuse = MAT_INITIAL_MATRIX;
1097   }
1098   /* extract A_RR */
1099   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1100   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1101   if (ibs != mbs) {
1102     Mat newmat;
1103     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1104     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1105     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1106   } else {
1107     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1108   }
1109   if (!pcbddc->ksp_R) { /* create object if not present */
1110     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1111     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1112     /* default */
1113     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1114     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1115     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1116     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1117     if (issbaij) {
1118       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1119     } else {
1120       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1121     }
1122     /* Allow user's customization */
1123     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1124     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1125   }
1126   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1127   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1128   if (!n_R) {
1129     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1130     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1131   }
1132   /* Set Up KSP for Neumann problem of BDDC */
1133   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1134 
1135   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1136   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1137     /* Dirichlet */
1138     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1139     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1140     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1141     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1142     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1143     /* need to be adapted? */
1144     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1145     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1146     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1147     /* print info */
1148     if (pcbddc->dbg_flag) {
1149       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1150       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1152       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1153       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1154       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1155     }
1156     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1157       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1158     }
1159 
1160     /* Neumann */
1161     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1162     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1163     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1164     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1165     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1166     /* need to be adapted? */
1167     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1168     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1169     /* print info */
1170     if (pcbddc->dbg_flag) {
1171       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1172       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1173     }
1174     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1175       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1176     }
1177   }
1178   /* free Neumann problem's matrix */
1179   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1185 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1186 {
1187   PetscErrorCode ierr;
1188   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1189 
1190   PetscFunctionBegin;
1191   if (applytranspose) {
1192     if (pcbddc->local_auxmat1) {
1193       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1194       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1195     }
1196     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1197   } else {
1198     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1199     if (pcbddc->local_auxmat1) {
1200       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1201       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1202     }
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1208 #undef __FUNCT__
1209 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1210 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1211 {
1212   PetscErrorCode ierr;
1213   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1214   PC_IS*            pcis = (PC_IS*)  (pc->data);
1215   const PetscScalar zero = 0.0;
1216 
1217   PetscFunctionBegin;
1218   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1219   if (applytranspose) {
1220     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1221     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1222   } else {
1223     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1224     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1225   }
1226   /* start communications from local primal nodes to rhs of coarse solver */
1227   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1228   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1232   /* TODO remove null space when doing multilevel */
1233   if (pcbddc->coarse_ksp) {
1234     if (applytranspose) {
1235       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1236     } else {
1237       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1238     }
1239   }
1240 
1241   /* Local solution on R nodes */
1242   if (pcis->n) {
1243     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1244     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1245     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1246     if (pcbddc->switch_static) {
1247       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1248       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1249     }
1250     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1251     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1252     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254     if (pcbddc->switch_static) {
1255       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257     }
1258   }
1259 
1260   /* communications from coarse sol to local primal nodes */
1261   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1262   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1263 
1264   /* Sum contributions from two levels */
1265   if (applytranspose) {
1266     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1267     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1268   } else {
1269     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1270     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1276 #undef __FUNCT__
1277 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1278 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1279 {
1280   PetscErrorCode ierr;
1281   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1282   PetscScalar    *array,*array2;
1283   Vec            from,to;
1284 
1285   PetscFunctionBegin;
1286   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1287     from = pcbddc->coarse_vec;
1288     to = pcbddc->vec1_P;
1289     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1290       Vec tvec;
1291       PetscInt lsize;
1292       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1293       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1294       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1295       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1296       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1297       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1298       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1299     }
1300   } else { /* from local to global -> put data in coarse right hand side */
1301     from = pcbddc->vec1_P;
1302     to = pcbddc->coarse_vec;
1303   }
1304   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1310 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1311 {
1312   PetscErrorCode ierr;
1313   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1314   PetscScalar    *array,*array2;
1315   Vec            from,to;
1316 
1317   PetscFunctionBegin;
1318   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1319     from = pcbddc->coarse_vec;
1320     to = pcbddc->vec1_P;
1321   } else { /* from local to global -> put data in coarse right hand side */
1322     from = pcbddc->vec1_P;
1323     to = pcbddc->coarse_vec;
1324   }
1325   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1326   if (smode == SCATTER_FORWARD) {
1327     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1328       Vec tvec;
1329       PetscInt lsize;
1330       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1331       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1332       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1333       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1334       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1335       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1336       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1337     }
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /* uncomment for testing purposes */
1343 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1344 #undef __FUNCT__
1345 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1346 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1347 {
1348   PetscErrorCode    ierr;
1349   PC_IS*            pcis = (PC_IS*)(pc->data);
1350   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1351   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1352   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1353   MatType           impMatType=MATSEQAIJ;
1354   /* one and zero */
1355   PetscScalar       one=1.0,zero=0.0;
1356   /* space to store constraints and their local indices */
1357   PetscScalar       *temp_quadrature_constraint;
1358   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1359   /* iterators */
1360   PetscInt          i,j,k,total_counts,temp_start_ptr;
1361   /* stuff to store connected components stored in pcbddc->mat_graph */
1362   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1363   PetscInt          n_ISForFaces,n_ISForEdges;
1364   /* near null space stuff */
1365   MatNullSpace      nearnullsp;
1366   const Vec         *nearnullvecs;
1367   Vec               *localnearnullsp;
1368   PetscBool         nnsp_has_cnst;
1369   PetscInt          nnsp_size;
1370   PetscScalar       *array;
1371   /* BLAS integers */
1372   PetscBLASInt      lwork,lierr;
1373   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1374   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1375   /* LAPACK working arrays for SVD or POD */
1376   PetscBool         skip_lapack;
1377   PetscScalar       *work;
1378   PetscReal         *singular_vals;
1379 #if defined(PETSC_USE_COMPLEX)
1380   PetscReal         *rwork;
1381 #endif
1382 #if defined(PETSC_MISSING_LAPACK_GESVD)
1383   PetscBLASInt      Blas_one_2=1;
1384   PetscScalar       *temp_basis,*correlation_mat;
1385 #else
1386   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1387   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1388 #endif
1389   /* reuse */
1390   PetscInt          olocal_primal_size;
1391   PetscInt          *oprimal_indices_local_idxs;
1392   /* change of basis */
1393   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1394   PetscBool         boolforchange,qr_needed;
1395   PetscBT           touched,change_basis,qr_needed_idx;
1396   /* auxiliary stuff */
1397   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1398   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1399   PetscScalar       *temp_quadrature_constraint_work;
1400   /* some quantities */
1401   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1402   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1403 
1404 
1405   PetscFunctionBegin;
1406   /* Destroy Mat objects computed previously */
1407   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1408   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1409   /* Get index sets for faces, edges and vertices from graph */
1410   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1411   /* free unneeded index sets */
1412   if (!pcbddc->use_vertices) {
1413     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1414   }
1415   if (!pcbddc->use_edges) {
1416     for (i=0;i<n_ISForEdges;i++) {
1417       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1418     }
1419     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1420     n_ISForEdges = 0;
1421   }
1422   if (!pcbddc->use_faces) {
1423     for (i=0;i<n_ISForFaces;i++) {
1424       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1425     }
1426     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1427     n_ISForFaces = 0;
1428   }
1429   /* HACKS (the following two blocks of code) */
1430   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1431     pcbddc->use_change_of_basis = PETSC_TRUE;
1432     if (!ISForEdges) {
1433       pcbddc->use_change_on_faces = PETSC_TRUE;
1434     }
1435   }
1436   if (pcbddc->NullSpace) {
1437     /* use_change_of_basis should be consistent among processors */
1438     PetscBool tbool[2],gbool[2];
1439     tbool [0] = pcbddc->use_change_of_basis;
1440     tbool [1] = pcbddc->use_change_on_faces;
1441     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1442     pcbddc->use_change_of_basis = gbool[0];
1443     pcbddc->use_change_on_faces = gbool[1];
1444   }
1445   /* print some info */
1446   if (pcbddc->dbg_flag) {
1447     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1448     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1449     i = 0;
1450     if (ISForVertices) {
1451       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1452     }
1453     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1454     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1455     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1456     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1457   }
1458   /* check if near null space is attached to global mat */
1459   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1460   if (nearnullsp) {
1461     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1462     /* remove any stored info */
1463     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1464     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1465     /* store information for BDDC solver reuse */
1466     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1467     pcbddc->onearnullspace = nearnullsp;
1468     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1469     for (i=0;i<nnsp_size;i++) {
1470       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1471     }
1472   } else { /* if near null space is not provided BDDC uses constants by default */
1473     nnsp_size = 0;
1474     nnsp_has_cnst = PETSC_TRUE;
1475   }
1476   /* get max number of constraints on a single cc */
1477   max_constraints = nnsp_size;
1478   if (nnsp_has_cnst) max_constraints++;
1479 
1480   /*
1481        Evaluate maximum storage size needed by the procedure
1482        - temp_indices will contain start index of each constraint stored as follows
1483        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1484        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1485        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1486                                                                                                                                                          */
1487   total_counts = n_ISForFaces+n_ISForEdges;
1488   total_counts *= max_constraints;
1489   n_vertices = 0;
1490   if (ISForVertices) {
1491     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1492   }
1493   total_counts += n_vertices;
1494   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1495   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1496   total_counts = 0;
1497   max_size_of_constraint = 0;
1498   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1499     if (i<n_ISForEdges) {
1500       used_IS = &ISForEdges[i];
1501     } else {
1502       used_IS = &ISForFaces[i-n_ISForEdges];
1503     }
1504     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1505     total_counts += j;
1506     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1507   }
1508   total_counts *= max_constraints;
1509   total_counts += n_vertices;
1510   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1511   /* get local part of global near null space vectors */
1512   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1513   for (k=0;k<nnsp_size;k++) {
1514     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1515     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1517   }
1518 
1519   /* whether or not to skip lapack calls */
1520   skip_lapack = PETSC_TRUE;
1521   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1522 
1523   /* allocate some auxiliary stuff */
1524   if (!skip_lapack || pcbddc->use_qr_single) {
1525     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1526   }
1527 
1528   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1529   if (!skip_lapack) {
1530     PetscScalar temp_work;
1531 
1532 #if defined(PETSC_MISSING_LAPACK_GESVD)
1533     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1534     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1535     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1536     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1537 #if defined(PETSC_USE_COMPLEX)
1538     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1539 #endif
1540     /* now we evaluate the optimal workspace using query with lwork=-1 */
1541     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1542     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1543     lwork = -1;
1544     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1545 #if !defined(PETSC_USE_COMPLEX)
1546     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1547 #else
1548     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1549 #endif
1550     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1551     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1552 #else /* on missing GESVD */
1553     /* SVD */
1554     PetscInt max_n,min_n;
1555     max_n = max_size_of_constraint;
1556     min_n = max_constraints;
1557     if (max_size_of_constraint < max_constraints) {
1558       min_n = max_size_of_constraint;
1559       max_n = max_constraints;
1560     }
1561     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1562 #if defined(PETSC_USE_COMPLEX)
1563     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1564 #endif
1565     /* now we evaluate the optimal workspace using query with lwork=-1 */
1566     lwork = -1;
1567     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1568     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1569     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1570     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1571 #if !defined(PETSC_USE_COMPLEX)
1572     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1573 #else
1574     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1575 #endif
1576     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1577     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1578 #endif /* on missing GESVD */
1579     /* Allocate optimal workspace */
1580     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1581     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1582   }
1583   /* Now we can loop on constraining sets */
1584   total_counts = 0;
1585   temp_indices[0] = 0;
1586   /* vertices */
1587   if (ISForVertices) {
1588     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1589     if (nnsp_has_cnst) { /* consider all vertices */
1590       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1591       for (i=0;i<n_vertices;i++) {
1592         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1593         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1594         total_counts++;
1595       }
1596     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1597       PetscBool used_vertex;
1598       for (i=0;i<n_vertices;i++) {
1599         used_vertex = PETSC_FALSE;
1600         k = 0;
1601         while (!used_vertex && k<nnsp_size) {
1602           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1603           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1604             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1605             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1606             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1607             total_counts++;
1608             used_vertex = PETSC_TRUE;
1609           }
1610           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1611           k++;
1612         }
1613       }
1614     }
1615     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1616     n_vertices = total_counts;
1617   }
1618 
1619   /* edges and faces */
1620   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1621     if (ncc<n_ISForEdges) {
1622       used_IS = &ISForEdges[ncc];
1623       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1624     } else {
1625       used_IS = &ISForFaces[ncc-n_ISForEdges];
1626       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1627     }
1628     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1629     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1630     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1631     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1632     /* change of basis should not be performed on local periodic nodes */
1633     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1634     if (nnsp_has_cnst) {
1635       PetscScalar quad_value;
1636       temp_constraints++;
1637       if (!pcbddc->use_nnsp_true) {
1638         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1639       } else {
1640         quad_value = 1.0;
1641       }
1642       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1643       for (j=0;j<size_of_constraint;j++) {
1644         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1645       }
1646       /* sort by global ordering if using lapack subroutines */
1647       if (!skip_lapack || pcbddc->use_qr_single) {
1648         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1649         for (j=0;j<size_of_constraint;j++) {
1650           permutation[j]=j;
1651         }
1652         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1653         for (j=0;j<size_of_constraint;j++) {
1654           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1655           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1656         }
1657         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1658         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1659       }
1660       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1661       total_counts++;
1662     }
1663     for (k=0;k<nnsp_size;k++) {
1664       PetscReal real_value;
1665       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1666       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1667       for (j=0;j<size_of_constraint;j++) {
1668         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1669       }
1670       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1671       /* check if array is null on the connected component */
1672       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1673       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1674       if (real_value > 0.0) { /* keep indices and values */
1675         /* sort by global ordering if using lapack subroutines */
1676         if (!skip_lapack || pcbddc->use_qr_single) {
1677           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1678           for (j=0;j<size_of_constraint;j++) {
1679             permutation[j]=j;
1680           }
1681           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1682           for (j=0;j<size_of_constraint;j++) {
1683             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1684             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1685           }
1686           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1687           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1688         }
1689         temp_constraints++;
1690         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1691         total_counts++;
1692       }
1693     }
1694     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1695     valid_constraints = temp_constraints;
1696     if (!pcbddc->use_nnsp_true && temp_constraints) {
1697       if (temp_constraints == 1) { /* just normalize the constraint */
1698         PetscScalar norm;
1699         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1700         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1701         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1702         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1703       } else { /* perform SVD */
1704         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1705 
1706 #if defined(PETSC_MISSING_LAPACK_GESVD)
1707         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1708            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1709            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1710               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1711               from that computed using LAPACKgesvd
1712            -> This is due to a different computation of eigenvectors in LAPACKheev
1713            -> The quality of the POD-computed basis will be the same */
1714         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1715         /* Store upper triangular part of correlation matrix */
1716         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1717         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1718         for (j=0;j<temp_constraints;j++) {
1719           for (k=0;k<j+1;k++) {
1720             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1721           }
1722         }
1723         /* compute eigenvalues and eigenvectors of correlation matrix */
1724         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1725         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1726 #if !defined(PETSC_USE_COMPLEX)
1727         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1728 #else
1729         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1730 #endif
1731         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1732         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1733         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1734         j = 0;
1735         while (j < temp_constraints && singular_vals[j] < tol) j++;
1736         total_counts = total_counts-j;
1737         valid_constraints = temp_constraints-j;
1738         /* scale and copy POD basis into used quadrature memory */
1739         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1740         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1741         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1742         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1743         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1744         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1745         if (j<temp_constraints) {
1746           PetscInt ii;
1747           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1748           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1749           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1750           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1751           for (k=0;k<temp_constraints-j;k++) {
1752             for (ii=0;ii<size_of_constraint;ii++) {
1753               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1754             }
1755           }
1756         }
1757 #else  /* on missing GESVD */
1758         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1759         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1760         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1761         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1762 #if !defined(PETSC_USE_COMPLEX)
1763         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1764 #else
1765         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1766 #endif
1767         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1768         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1769         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1770         k = temp_constraints;
1771         if (k > size_of_constraint) k = size_of_constraint;
1772         j = 0;
1773         while (j < k && singular_vals[k-j-1] < tol) j++;
1774         valid_constraints = k-j;
1775         total_counts = total_counts-temp_constraints+valid_constraints;
1776 #endif /* on missing GESVD */
1777       }
1778     }
1779     /* setting change_of_basis flag is safe now */
1780     if (boolforchange) {
1781       for (j=0;j<valid_constraints;j++) {
1782         PetscBTSet(change_basis,total_counts-j-1);
1783       }
1784     }
1785   }
1786   /* free index sets of faces, edges and vertices */
1787   for (i=0;i<n_ISForFaces;i++) {
1788     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1789   }
1790   if (n_ISForFaces) {
1791     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1792   }
1793   for (i=0;i<n_ISForEdges;i++) {
1794     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1795   }
1796   if (n_ISForEdges) {
1797     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1798   }
1799   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1800   /* map temp_indices_to_constraint in boundary numbering */
1801   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1802   if (i != temp_indices[total_counts]) {
1803     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1804   }
1805 
1806   /* free workspace */
1807   if (!skip_lapack || pcbddc->use_qr_single) {
1808     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1809   }
1810   if (!skip_lapack) {
1811     ierr = PetscFree(work);CHKERRQ(ierr);
1812 #if defined(PETSC_USE_COMPLEX)
1813     ierr = PetscFree(rwork);CHKERRQ(ierr);
1814 #endif
1815     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1816 #if defined(PETSC_MISSING_LAPACK_GESVD)
1817     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1818     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1819 #endif
1820   }
1821   for (k=0;k<nnsp_size;k++) {
1822     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1823   }
1824   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1825 
1826   /* set quantities in pcbddc data structure and store previous primal size */
1827   /* n_vertices defines the number of subdomain corners in the primal space */
1828   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1829   olocal_primal_size = pcbddc->local_primal_size;
1830   pcbddc->local_primal_size = total_counts;
1831   pcbddc->n_vertices = n_vertices;
1832   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1833 
1834   /* Create constraint matrix */
1835   /* The constraint matrix is used to compute the l2g map of primal dofs */
1836   /* so we need to set it up properly either with or without change of basis */
1837   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1838   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1839   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1840   /* array to compute a local numbering of constraints : vertices first then constraints */
1841   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1842   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1843   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1844   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1845   /* auxiliary stuff for basis change */
1846   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1847   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1848 
1849   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1850   total_primal_vertices=0;
1851   for (i=0;i<pcbddc->local_primal_size;i++) {
1852     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1853     if (size_of_constraint == 1) {
1854       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1855       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1856       aux_primal_minloc[total_primal_vertices]=0;
1857       total_primal_vertices++;
1858     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1859       PetscInt min_loc,min_index;
1860       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1861       /* find first untouched local node */
1862       k = 0;
1863       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1864       min_index = global_indices[k];
1865       min_loc = k;
1866       /* search the minimum among global nodes already untouched on the cc */
1867       for (k=1;k<size_of_constraint;k++) {
1868         /* there can be more than one constraint on a single connected component */
1869         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1870           min_index = global_indices[k];
1871           min_loc = k;
1872         }
1873       }
1874       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1875       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1876       aux_primal_minloc[total_primal_vertices]=min_loc;
1877       total_primal_vertices++;
1878     }
1879   }
1880   /* determine if a QR strategy is needed for change of basis */
1881   qr_needed = PETSC_FALSE;
1882   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1883   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1884     if (PetscBTLookup(change_basis,i)) {
1885       if (!pcbddc->use_qr_single) {
1886         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1887         j = 0;
1888         for (k=0;k<size_of_constraint;k++) {
1889           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1890             j++;
1891           }
1892         }
1893         /* found more than one primal dof on the cc */
1894         if (j > 1) {
1895           PetscBTSet(qr_needed_idx,i);
1896           qr_needed = PETSC_TRUE;
1897         }
1898       } else {
1899         PetscBTSet(qr_needed_idx,i);
1900         qr_needed = PETSC_TRUE;
1901       }
1902     }
1903   }
1904   /* free workspace */
1905   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1906 
1907   /* permute indices in order to have a sorted set of vertices */
1908   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1909 
1910   /* nonzero structure of constraint matrix */
1911   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1912   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1913   j=total_primal_vertices;
1914   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1915     if (!PetscBTLookup(change_basis,i)) {
1916       nnz[j]=temp_indices[i+1]-temp_indices[i];
1917       j++;
1918     }
1919   }
1920   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1921   ierr = PetscFree(nnz);CHKERRQ(ierr);
1922   /* set values in constraint matrix */
1923   for (i=0;i<total_primal_vertices;i++) {
1924     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1925   }
1926   total_counts = total_primal_vertices;
1927   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1928     if (!PetscBTLookup(change_basis,i)) {
1929       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1930       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1931       total_counts++;
1932     }
1933   }
1934   /* assembling */
1935   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   /*
1938   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1939   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1940   */
1941   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1942   if (pcbddc->use_change_of_basis) {
1943     /* dual and primal dofs on a single cc */
1944     PetscInt     dual_dofs,primal_dofs;
1945     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1946     PetscInt     primal_counter;
1947     /* working stuff for GEQRF */
1948     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1949     PetscBLASInt lqr_work;
1950     /* working stuff for UNGQR */
1951     PetscScalar  *gqr_work,lgqr_work_t;
1952     PetscBLASInt lgqr_work;
1953     /* working stuff for TRTRS */
1954     PetscScalar  *trs_rhs;
1955     PetscBLASInt Blas_NRHS;
1956     /* pointers for values insertion into change of basis matrix */
1957     PetscInt     *start_rows,*start_cols;
1958     PetscScalar  *start_vals;
1959     /* working stuff for values insertion */
1960     PetscBT      is_primal;
1961     /* matrix sizes */
1962     PetscInt     global_size,local_size;
1963     /* work array for nonzeros */
1964     PetscScalar  *nnz_array;
1965     /* temporary change of basis */
1966     Mat          localChangeOfBasisMatrix;
1967     /* auxiliary work for global change of basis */
1968     Vec          nnz_vec;
1969     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
1970     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
1971     PetscScalar  *vals;
1972     PetscBool    done;
1973 
1974     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
1975     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
1976     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1977     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1978 
1979     /* nonzeros for local mat */
1980     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1981     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1982     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1983       if (PetscBTLookup(change_basis,i)) {
1984         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1985         if (PetscBTLookup(qr_needed_idx,i)) {
1986           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1987         } else {
1988           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1989           /* get local primal index on the cc */
1990           j = 0;
1991           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1992           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1993         }
1994       }
1995     }
1996     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1997     /* Set initial identity in the matrix */
1998     for (i=0;i<pcis->n_B;i++) {
1999       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2000     }
2001 
2002     if (pcbddc->dbg_flag) {
2003       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2004       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2005     }
2006 
2007 
2008     /* Now we loop on the constraints which need a change of basis */
2009     /*
2010        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2011        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2012 
2013        Basic blocks of change of basis matrix T computed by
2014 
2015           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2016 
2017             | 1        0   ...        0         s_1/S |
2018             | 0        1   ...        0         s_2/S |
2019             |              ...                        |
2020             | 0        ...            1     s_{n-1}/S |
2021             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2022 
2023             with S = \sum_{i=1}^n s_i^2
2024             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2025                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2026 
2027           - QR decomposition of constraints otherwise
2028     */
2029     if (qr_needed) {
2030       /* space to store Q */
2031       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2032       /* first we issue queries for optimal work */
2033       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2034       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2035       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2036       lqr_work = -1;
2037       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2038       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2039       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2040       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2041       lgqr_work = -1;
2042       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2043       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2044       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2045       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2046       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2047       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2048       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2049       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2050       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2051       /* array to store scaling factors for reflectors */
2052       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2053       /* array to store rhs and solution of triangular solver */
2054       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2055       /* allocating workspace for check */
2056       if (pcbddc->dbg_flag) {
2057         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2058       }
2059     }
2060     /* array to store whether a node is primal or not */
2061     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2062     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2063     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2064     if (i != total_primal_vertices) {
2065       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2066     }
2067     for (i=0;i<total_primal_vertices;i++) {
2068       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2069     }
2070     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2071 
2072     /* loop on constraints and see whether or not they need a change of basis and compute it */
2073     /* -> using implicit ordering contained in temp_indices data */
2074     total_counts = pcbddc->n_vertices;
2075     primal_counter = total_counts;
2076     while (total_counts<pcbddc->local_primal_size) {
2077       primal_dofs = 1;
2078       if (PetscBTLookup(change_basis,total_counts)) {
2079         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2080         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2081           primal_dofs++;
2082         }
2083         /* get constraint info */
2084         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2085         dual_dofs = size_of_constraint-primal_dofs;
2086 
2087         if (pcbddc->dbg_flag) {
2088           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2089         }
2090 
2091         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2092 
2093           /* copy quadrature constraints for change of basis check */
2094           if (pcbddc->dbg_flag) {
2095             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2096           }
2097           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2098           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2099 
2100           /* compute QR decomposition of constraints */
2101           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2102           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2103           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2104           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2105           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2106           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2107           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2108 
2109           /* explictly compute R^-T */
2110           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2111           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2112           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2113           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2115           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2116           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2117           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2118           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2119           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2120 
2121           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2122           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2123           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2124           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2125           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2126           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2127           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2128           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2129           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130 
2131           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2132              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2133              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2138           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2139           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2140           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2141           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2142           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2143           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2144 
2145           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2146           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2147           /* insert cols for primal dofs */
2148           for (j=0;j<primal_dofs;j++) {
2149             start_vals = &qr_basis[j*size_of_constraint];
2150             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2151             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2152           }
2153           /* insert cols for dual dofs */
2154           for (j=0,k=0;j<dual_dofs;k++) {
2155             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2156               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2157               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2158               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2159               j++;
2160             }
2161           }
2162 
2163           /* check change of basis */
2164           if (pcbddc->dbg_flag) {
2165             PetscInt   ii,jj;
2166             PetscBool valid_qr=PETSC_TRUE;
2167             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2168             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2169             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2170             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2171             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2172             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2173             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2174             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2175             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2176             for (jj=0;jj<size_of_constraint;jj++) {
2177               for (ii=0;ii<primal_dofs;ii++) {
2178                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2179                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2180               }
2181             }
2182             if (!valid_qr) {
2183               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2184               for (jj=0;jj<size_of_constraint;jj++) {
2185                 for (ii=0;ii<primal_dofs;ii++) {
2186                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2187                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2188                   }
2189                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2190                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2191                   }
2192                 }
2193               }
2194             } else {
2195               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2196             }
2197           }
2198         } else { /* simple transformation block */
2199           PetscInt    row,col;
2200           PetscScalar val,norm;
2201 
2202           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2203           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2204           for (j=0;j<size_of_constraint;j++) {
2205             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2206             if (!PetscBTLookup(is_primal,row)) {
2207               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2208               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2209               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2210             } else {
2211               for (k=0;k<size_of_constraint;k++) {
2212                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2213                 if (row != col) {
2214                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2215                 } else {
2216                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2217                 }
2218                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2219               }
2220             }
2221           }
2222           if (pcbddc->dbg_flag) {
2223             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2224           }
2225         }
2226         /* increment primal counter */
2227         primal_counter += primal_dofs;
2228       } else {
2229         if (pcbddc->dbg_flag) {
2230           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2231         }
2232       }
2233       /* increment constraint counter total_counts */
2234       total_counts += primal_dofs;
2235     }
2236 
2237     /* free workspace */
2238     if (qr_needed) {
2239       if (pcbddc->dbg_flag) {
2240         ierr = PetscFree(work);CHKERRQ(ierr);
2241       }
2242       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2243       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2244       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2245       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2246       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2247     }
2248     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2249     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251 
2252     /* assembling of global change of variable */
2253     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2254     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2255     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2256     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2257     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2258     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2259 
2260     /* nonzeros (overestimated) */
2261     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2262     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2263     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2264     for (i=0;i<pcis->n;i++) {
2265       nnz_array[i] = 1.0;
2266       idxs_all[i] = i;
2267     }
2268     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2269     for (i=0;i<pcis->n_B;i++) {
2270       nnz_array[idxs_B[i]] = nnz[i];
2271     }
2272     if (pcis->n) {
2273       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2274     }
2275     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2276     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2277     ierr = PetscFree(nnz);CHKERRQ(ierr);
2278     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2279     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2280     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2281     for (i=0;i<local_size;i++) {
2282       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2283       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2284     }
2285     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2286     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2287     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2288     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2289 
2290     /* Set identity on dirichlet dofs */
2291     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2292     for (i=0;i<pcis->n-pcis->n_B;i++) {
2293       PetscScalar one=1.0;
2294       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2295     }
2296     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2297 
2298     /* Set values at interface dofs */
2299     done = PETSC_TRUE;
2300     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2301     if (!done) {
2302       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2303     }
2304     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2305     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2306     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2307     for (i=0;i<nvtxs;i++) {
2308       PetscInt    row,*cols,ncols;
2309       PetscScalar *mat_vals;
2310 
2311       row = idxs_B[i];
2312       ncols = xadj[i+1]-xadj[i];
2313       cols = idxs_mapped+xadj[i];
2314       mat_vals = vals+xadj[i];
2315       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2316     }
2317     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2318     if (!done) {
2319       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2320     }
2321     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2322     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2323     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2324     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2325     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326 
2327     /* check */
2328     if (pcbddc->dbg_flag) {
2329       PetscReal error;
2330       Vec       x,x_change;
2331 
2332       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2333       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2334       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2335       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2336       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2337       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2338       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2339       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2340       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2341       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2342       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2343       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2344       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2345       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2346       ierr = VecDestroy(&x);CHKERRQ(ierr);
2347       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2348     }
2349     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2350   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2351     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2352     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2353   }
2354 
2355   /* set up change of basis context */
2356   if (pcbddc->ChangeOfBasisMatrix) {
2357     PCBDDCChange_ctx change_ctx;
2358 
2359     if (!pcbddc->new_global_mat) {
2360       PetscInt global_size,local_size;
2361 
2362       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2363       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2364       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2365       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2366       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2367       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2368       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2369       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2370       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2371     } else {
2372       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2373       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2374       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2375     }
2376     if (!pcbddc->user_ChangeOfBasisMatrix) {
2377       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2378       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2379     } else {
2380       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2381       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2382     }
2383     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2384     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2385     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   }
2388 
2389   /* get indices in local ordering for vertices and constraints */
2390   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2391     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2392     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2393   }
2394   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2395   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2396   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2397   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2398   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2399   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2400   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2401   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2402   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2403   /* set quantities in PCBDDC data struct */
2404   pcbddc->n_actual_vertices = i;
2405   /* check if a new primal space has been introduced */
2406   pcbddc->new_primal_space_local = PETSC_TRUE;
2407   if (olocal_primal_size == pcbddc->local_primal_size) {
2408     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2409     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2410     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2411   }
2412   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2413   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2414 
2415   /* flush dbg viewer */
2416   if (pcbddc->dbg_flag) {
2417     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2418   }
2419 
2420   /* free workspace */
2421   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2422   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2423   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2424   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2425   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2426   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2432 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2433 {
2434   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2435   PC_IS       *pcis = (PC_IS*)pc->data;
2436   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2437   PetscInt    ierr,i,vertex_size;
2438   PetscViewer viewer=pcbddc->dbg_viewer;
2439 
2440   PetscFunctionBegin;
2441   /* Reset previously computed graph */
2442   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2443   /* Init local Graph struct */
2444   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2445 
2446   /* Check validity of the csr graph passed in by the user */
2447   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2448     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2449   }
2450 
2451   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2452   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2453     Mat       mat_adj;
2454     PetscInt  *xadj,*adjncy;
2455     PetscInt  nvtxs;
2456     PetscBool flg_row=PETSC_TRUE;
2457 
2458     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2459     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2460     if (!flg_row) {
2461       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2462     }
2463     if (pcbddc->use_local_adj) {
2464       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2465       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2466     } else { /* just compute subdomain's connected components */
2467       IS                     is_dummy;
2468       ISLocalToGlobalMapping l2gmap_dummy;
2469       PetscInt               j,sum;
2470       PetscInt               *cxadj,*cadjncy;
2471       const PetscInt         *idxs;
2472       PCBDDCGraph            graph;
2473       PetscBT                is_on_boundary;
2474 
2475       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2476       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2477       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2478       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2479       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2480       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2481       graph->xadj = xadj;
2482       graph->adjncy = adjncy;
2483       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2484       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2485 
2486       if (pcbddc->dbg_flag) {
2487         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2488         for (i=0;i<graph->ncc;i++) {
2489           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2490         }
2491         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2492       }
2493 
2494       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2495       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2496       for (i=0;i<pcis->n_B;i++) {
2497         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2498       }
2499       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2500 
2501       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2502       sum = 0;
2503       for (i=0;i<graph->ncc;i++) {
2504         PetscInt sizecc = 0;
2505         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2506           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2507             sizecc++;
2508           }
2509         }
2510         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2511           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2512             cxadj[graph->queue[j]] = sizecc;
2513           }
2514         }
2515         sum += sizecc*sizecc;
2516       }
2517       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2518       sum = 0;
2519       for (i=0;i<nvtxs;i++) {
2520         PetscInt temp = cxadj[i];
2521         cxadj[i] = sum;
2522         sum += temp;
2523       }
2524       cxadj[nvtxs] = sum;
2525       for (i=0;i<graph->ncc;i++) {
2526         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2527           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2528             PetscInt k,sizecc = 0;
2529             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2530               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2531                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2532                 sizecc++;
2533               }
2534             }
2535           }
2536         }
2537       }
2538       if (nvtxs) {
2539         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2540       } else {
2541         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2542         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2543       }
2544       graph->xadj = 0;
2545       graph->adjncy = 0;
2546       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2547       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2548     }
2549     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2550     if (!flg_row) {
2551       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2552     }
2553     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2554   }
2555 
2556   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2557   vertex_size = 1;
2558   if (pcbddc->user_provided_isfordofs) {
2559     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2560       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2561       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2562         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2563         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2564       }
2565       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2566       pcbddc->n_ISForDofs = 0;
2567       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2568     }
2569     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2570     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2571   } else {
2572     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2573       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2574       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2575       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2576         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2577       }
2578     }
2579   }
2580 
2581   /* Setup of Graph */
2582   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2583     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2584   }
2585   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2586     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2587   }
2588   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2589 
2590   /* Graph's connected components analysis */
2591   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2592 
2593   /* print some info to stdout */
2594   if (pcbddc->dbg_flag) {
2595     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2596   }
2597 
2598   /* mark topography has done */
2599   pcbddc->recompute_topography = PETSC_FALSE;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2605 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2606 {
2607   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2608   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2609   PetscErrorCode ierr;
2610 
2611   PetscFunctionBegin;
2612   n = 0;
2613   vertices = 0;
2614   if (pcbddc->ConstraintMatrix) {
2615     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2616     for (i=0;i<local_primal_size;i++) {
2617       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2618       if (size_of_constraint == 1) n++;
2619       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2620     }
2621     if (vertices_idx) {
2622       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2623       n = 0;
2624       for (i=0;i<local_primal_size;i++) {
2625         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2626         if (size_of_constraint == 1) {
2627           vertices[n++]=row_cmat_indices[0];
2628         }
2629         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2630       }
2631     }
2632   }
2633   *n_vertices = n;
2634   if (vertices_idx) *vertices_idx = vertices;
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2640 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2641 {
2642   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2643   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2644   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2645   PetscBT        touched;
2646   PetscErrorCode ierr;
2647 
2648     /* This function assumes that the number of local constraints per connected component
2649        is not greater than the number of nodes defined for the connected component
2650        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2651   PetscFunctionBegin;
2652   n = 0;
2653   constraints_index = 0;
2654   if (pcbddc->ConstraintMatrix) {
2655     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2656     max_size_of_constraint = 0;
2657     for (i=0;i<local_primal_size;i++) {
2658       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2659       if (size_of_constraint > 1) {
2660         n++;
2661       }
2662       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2663       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2664     }
2665     if (constraints_idx) {
2666       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2667       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2668       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2669       n = 0;
2670       for (i=0;i<local_primal_size;i++) {
2671         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2672         if (size_of_constraint > 1) {
2673           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2674           /* find first untouched local node */
2675           j = 0;
2676           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2677           min_index = row_cmat_global_indices[j];
2678           min_loc = j;
2679           /* search the minimum among nodes not yet touched on the connected component
2680              since there can be more than one constraint on a single cc */
2681           for (j=1;j<size_of_constraint;j++) {
2682             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2683               min_index = row_cmat_global_indices[j];
2684               min_loc = j;
2685             }
2686           }
2687           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2688           constraints_index[n++] = row_cmat_indices[min_loc];
2689         }
2690         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2691       }
2692       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2693       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2694     }
2695   }
2696   *n_constraints = n;
2697   if (constraints_idx) *constraints_idx = constraints_index;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 #undef __FUNCT__
2702 #define __FUNCT__ "PCBDDCSubsetNumbering"
2703 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2704 {
2705   Vec            local_vec,global_vec;
2706   IS             seqis,paris;
2707   VecScatter     scatter_ctx;
2708   PetscScalar    *array;
2709   PetscInt       *temp_global_dofs;
2710   PetscScalar    globalsum;
2711   PetscInt       i,j,s;
2712   PetscInt       nlocals,first_index,old_index,max_local;
2713   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2714   PetscMPIInt    *dof_sizes,*dof_displs;
2715   PetscBool      first_found;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   /* mpi buffers */
2720   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2721   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2722   j = ( !rank_prec_comm ? size_prec_comm : 0);
2723   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2724   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2725   /* get maximum size of subset */
2726   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2727   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2728   max_local = 0;
2729   for (i=0;i<n_local_dofs;i++) {
2730     if (max_local < temp_global_dofs[i] ) {
2731       max_local = temp_global_dofs[i];
2732     }
2733   }
2734   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2735   max_global++;
2736   max_local = 0;
2737   for (i=0;i<n_local_dofs;i++) {
2738     if (max_local < local_dofs[i] ) {
2739       max_local = local_dofs[i];
2740     }
2741   }
2742   max_local++;
2743   /* allocate workspace */
2744   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2745   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2746   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2747   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2748   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2749   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2750   /* create scatter */
2751   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2752   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2753   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2754   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2755   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2756   /* init array */
2757   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2758   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2759   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2760   if (local_dofs_mult) {
2761     for (i=0;i<n_local_dofs;i++) {
2762       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2763     }
2764   } else {
2765     for (i=0;i<n_local_dofs;i++) {
2766       array[local_dofs[i]]=1.0;
2767     }
2768   }
2769   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2770   /* scatter into global vec and get total number of global dofs */
2771   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2772   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2773   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2774   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2775   /* Fill global_vec with cumulative function for global numbering */
2776   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2777   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2778   nlocals = 0;
2779   first_index = -1;
2780   first_found = PETSC_FALSE;
2781   for (i=0;i<s;i++) {
2782     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2783       first_found = PETSC_TRUE;
2784       first_index = i;
2785     }
2786     nlocals += (PetscInt)PetscRealPart(array[i]);
2787   }
2788   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2789   if (!rank_prec_comm) {
2790     dof_displs[0]=0;
2791     for (i=1;i<size_prec_comm;i++) {
2792       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2793     }
2794   }
2795   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2796   if (first_found) {
2797     array[first_index] += (PetscScalar)nlocals;
2798     old_index = first_index;
2799     for (i=first_index+1;i<s;i++) {
2800       if (PetscRealPart(array[i]) > 0.1) {
2801         array[i] += array[old_index];
2802         old_index = i;
2803       }
2804     }
2805   }
2806   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2807   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2808   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2809   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2810   /* get global ordering of local dofs */
2811   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2812   if (local_dofs_mult) {
2813     for (i=0;i<n_local_dofs;i++) {
2814       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2815     }
2816   } else {
2817     for (i=0;i<n_local_dofs;i++) {
2818       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2819     }
2820   }
2821   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2822   /* free workspace */
2823   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2824   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2825   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2826   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2827   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2828   /* return pointer to global ordering of local dofs */
2829   *global_numbering_subset = temp_global_dofs;
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2835 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2836 {
2837   PetscInt       i,j;
2838   PetscScalar    *alphas;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   /* this implements stabilized Gram-Schmidt */
2843   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2844   for (i=0;i<n;i++) {
2845     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2846     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2847     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2848   }
2849   ierr = PetscFree(alphas);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatISGetSubassemblingPattern"
2855 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2856 {
2857   Mat             subdomain_adj;
2858   IS              new_ranks,ranks_send_to;
2859   MatPartitioning partitioner;
2860   Mat_IS          *matis;
2861   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2862   PetscInt        prank;
2863   PetscMPIInt     size,rank,color;
2864   PetscInt        *xadj,*adjncy,*oldranks;
2865   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2866   PetscInt        i,local_size,threshold=0;
2867   PetscErrorCode  ierr;
2868   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2869   PetscSubcomm    subcomm;
2870 
2871   PetscFunctionBegin;
2872   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2873   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2874   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2875 
2876   /* Get info on mapping */
2877   matis = (Mat_IS*)(mat->data);
2878   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2879   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2880 
2881   /* build local CSR graph of subdomains' connectivity */
2882   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2883   xadj[0] = 0;
2884   xadj[1] = PetscMax(n_neighs-1,0);
2885   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2886   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2887 
2888   if (threshold) {
2889     PetscInt xadj_count = 0;
2890     for (i=1;i<n_neighs;i++) {
2891       if (n_shared[i] > threshold) {
2892         adjncy[xadj_count] = neighs[i];
2893         adjncy_wgt[xadj_count] = n_shared[i];
2894         xadj_count++;
2895       }
2896     }
2897     xadj[1] = xadj_count;
2898   } else {
2899     if (xadj[1]) {
2900       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2901       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2902     }
2903   }
2904   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2905   if (use_square) {
2906     for (i=0;i<xadj[1];i++) {
2907       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2908     }
2909   }
2910   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2911 
2912   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2913 
2914   /*
2915     Restrict work on active processes only.
2916   */
2917   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2918   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2919   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2920   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2921   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2922   if (color) {
2923     ierr = PetscFree(xadj);CHKERRQ(ierr);
2924     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2925     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2926   } else {
2927     PetscInt coarsening_ratio;
2928     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2929     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2930     prank = rank;
2931     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2932     /*
2933     for (i=0;i<size;i++) {
2934       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2935     }
2936     */
2937     for (i=0;i<xadj[1];i++) {
2938       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2939     }
2940     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2941     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2942     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2943 
2944     /* Partition */
2945     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2946     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2947     if (use_vwgt) {
2948       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
2949       v_wgt[0] = local_size;
2950       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2951     }
2952     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2953     coarsening_ratio = size/n_subdomains;
2954     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2955     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2956     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2957     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2958 
2959     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2960     if (contiguous) {
2961       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2962     } else {
2963       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2964     }
2965     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2966     /* clean up */
2967     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2968     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2969     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2970     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2971   }
2972   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2973 
2974   /* assemble parallel IS for sends */
2975   i = 1;
2976   if (color) i=0;
2977   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2978 
2979   /* get back IS */
2980   *is_sends = ranks_send_to;
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2985 
2986 #undef __FUNCT__
2987 #define __FUNCT__ "MatISSubassemble"
2988 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2989 {
2990   Mat                    local_mat;
2991   Mat_IS                 *matis;
2992   IS                     is_sends_internal;
2993   PetscInt               rows,cols,new_local_rows;
2994   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2995   PetscBool              ismatis,isdense,newisdense,destroy_mat;
2996   ISLocalToGlobalMapping l2gmap;
2997   PetscInt*              l2gmap_indices;
2998   const PetscInt*        is_indices;
2999   MatType                new_local_type;
3000   /* buffers */
3001   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3002   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3003   PetscInt               *recv_buffer_idxs_local;
3004   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3005   /* MPI */
3006   MPI_Comm               comm,comm_n;
3007   PetscSubcomm           subcomm;
3008   PetscMPIInt            n_sends,n_recvs,commsize;
3009   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3010   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3011   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3012   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3013   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3014   PetscErrorCode         ierr;
3015 
3016   PetscFunctionBegin;
3017   /* TODO: add missing checks */
3018   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3019   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3020   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3021   PetscValidLogicalCollectiveInt(mat,nis,7);
3022   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3023   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3024   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3025   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3026   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3027   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3028   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3029   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3030     PetscInt mrows,mcols,mnrows,mncols;
3031     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3032     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3033     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3034     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3035     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3036     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3037   }
3038   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3039   PetscValidLogicalCollectiveInt(mat,bs,0);
3040   /* prepare IS for sending if not provided */
3041   if (!is_sends) {
3042     PetscBool pcontig = PETSC_TRUE;
3043     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3044     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3045   } else {
3046     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3047     is_sends_internal = is_sends;
3048   }
3049 
3050   /* get pointer of MATIS data */
3051   matis = (Mat_IS*)mat->data;
3052 
3053   /* get comm */
3054   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3055 
3056   /* compute number of sends */
3057   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3058   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3059 
3060   /* compute number of receives */
3061   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3062   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3063   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3064   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3065   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3066   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3067   ierr = PetscFree(iflags);CHKERRQ(ierr);
3068 
3069   /* restrict comm if requested */
3070   subcomm = 0;
3071   destroy_mat = PETSC_FALSE;
3072   if (restrict_comm) {
3073     PetscMPIInt color,rank,subcommsize;
3074     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3075     color = 0;
3076     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3077     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3078     subcommsize = commsize - subcommsize;
3079     /* check if reuse has been requested */
3080     if (reuse == MAT_REUSE_MATRIX) {
3081       if (*mat_n) {
3082         PetscMPIInt subcommsize2;
3083         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3084         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3085         comm_n = PetscObjectComm((PetscObject)*mat_n);
3086       } else {
3087         comm_n = PETSC_COMM_SELF;
3088       }
3089     } else { /* MAT_INITIAL_MATRIX */
3090       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3091       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3092       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3093       comm_n = subcomm->comm;
3094     }
3095     /* flag to destroy *mat_n if not significative */
3096     if (color) destroy_mat = PETSC_TRUE;
3097   } else {
3098     comm_n = comm;
3099   }
3100 
3101   /* prepare send/receive buffers */
3102   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3103   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3104   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3105   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3106   if (nis) {
3107     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3108   }
3109 
3110   /* Get data from local matrices */
3111   if (!isdense) {
3112     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3113     /* TODO: See below some guidelines on how to prepare the local buffers */
3114     /*
3115        send_buffer_vals should contain the raw values of the local matrix
3116        send_buffer_idxs should contain:
3117        - MatType_PRIVATE type
3118        - PetscInt        size_of_l2gmap
3119        - PetscInt        global_row_indices[size_of_l2gmap]
3120        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3121     */
3122   } else {
3123     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3124     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3125     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3126     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3127     send_buffer_idxs[1] = i;
3128     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3129     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3130     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3131     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3132     for (i=0;i<n_sends;i++) {
3133       ilengths_vals[is_indices[i]] = len*len;
3134       ilengths_idxs[is_indices[i]] = len+2;
3135     }
3136   }
3137   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3138   /* additional is (if any) */
3139   if (nis) {
3140     PetscMPIInt psum;
3141     PetscInt j;
3142     for (j=0,psum=0;j<nis;j++) {
3143       PetscInt plen;
3144       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3145       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3146       psum += len+1; /* indices + lenght */
3147     }
3148     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3149     for (j=0,psum=0;j<nis;j++) {
3150       PetscInt plen;
3151       const PetscInt *is_array_idxs;
3152       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3153       send_buffer_idxs_is[psum] = plen;
3154       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3155       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3156       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3157       psum += plen+1; /* indices + lenght */
3158     }
3159     for (i=0;i<n_sends;i++) {
3160       ilengths_idxs_is[is_indices[i]] = psum;
3161     }
3162     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3163   }
3164 
3165   buf_size_idxs = 0;
3166   buf_size_vals = 0;
3167   buf_size_idxs_is = 0;
3168   for (i=0;i<n_recvs;i++) {
3169     buf_size_idxs += (PetscInt)olengths_idxs[i];
3170     buf_size_vals += (PetscInt)olengths_vals[i];
3171     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3172   }
3173   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3174   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3175   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3176 
3177   /* get new tags for clean communications */
3178   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3179   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3180   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3181 
3182   /* allocate for requests */
3183   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3184   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3185   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3186   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3187   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3188   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3189 
3190   /* communications */
3191   ptr_idxs = recv_buffer_idxs;
3192   ptr_vals = recv_buffer_vals;
3193   ptr_idxs_is = recv_buffer_idxs_is;
3194   for (i=0;i<n_recvs;i++) {
3195     source_dest = onodes[i];
3196     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3197     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3198     ptr_idxs += olengths_idxs[i];
3199     ptr_vals += olengths_vals[i];
3200     if (nis) {
3201       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3202       ptr_idxs_is += olengths_idxs_is[i];
3203     }
3204   }
3205   for (i=0;i<n_sends;i++) {
3206     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3207     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3208     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3209     if (nis) {
3210       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3211     }
3212   }
3213   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3214   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3215 
3216   /* assemble new l2g map */
3217   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3218   ptr_idxs = recv_buffer_idxs;
3219   new_local_rows = 0;
3220   for (i=0;i<n_recvs;i++) {
3221     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3222     ptr_idxs += olengths_idxs[i];
3223   }
3224   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3225   ptr_idxs = recv_buffer_idxs;
3226   new_local_rows = 0;
3227   for (i=0;i<n_recvs;i++) {
3228     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3229     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3230     ptr_idxs += olengths_idxs[i];
3231   }
3232   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3233   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3234   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3235 
3236   /* infer new local matrix type from received local matrices type */
3237   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3238   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3239   if (n_recvs) {
3240     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3241     ptr_idxs = recv_buffer_idxs;
3242     for (i=0;i<n_recvs;i++) {
3243       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3244         new_local_type_private = MATAIJ_PRIVATE;
3245         break;
3246       }
3247       ptr_idxs += olengths_idxs[i];
3248     }
3249     switch (new_local_type_private) {
3250       case MATDENSE_PRIVATE:
3251         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3252           new_local_type = MATSEQAIJ;
3253           bs = 1;
3254         } else { /* if I receive only 1 dense matrix */
3255           new_local_type = MATSEQDENSE;
3256           bs = 1;
3257         }
3258         break;
3259       case MATAIJ_PRIVATE:
3260         new_local_type = MATSEQAIJ;
3261         bs = 1;
3262         break;
3263       case MATBAIJ_PRIVATE:
3264         new_local_type = MATSEQBAIJ;
3265         break;
3266       case MATSBAIJ_PRIVATE:
3267         new_local_type = MATSEQSBAIJ;
3268         break;
3269       default:
3270         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3271         break;
3272     }
3273   } else { /* by default, new_local_type is seqdense */
3274     new_local_type = MATSEQDENSE;
3275     bs = 1;
3276   }
3277 
3278   /* create MATIS object if needed */
3279   if (reuse == MAT_INITIAL_MATRIX) {
3280     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3281     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3282   } else {
3283     /* it also destroys the local matrices */
3284     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3285   }
3286   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3287   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3288 
3289   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3290 
3291   /* Global to local map of received indices */
3292   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3293   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3294   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3295 
3296   /* restore attributes -> type of incoming data and its size */
3297   buf_size_idxs = 0;
3298   for (i=0;i<n_recvs;i++) {
3299     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3300     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3301     buf_size_idxs += (PetscInt)olengths_idxs[i];
3302   }
3303   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3304 
3305   /* set preallocation */
3306   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3307   if (!newisdense) {
3308     PetscInt *new_local_nnz=0;
3309 
3310     ptr_vals = recv_buffer_vals;
3311     ptr_idxs = recv_buffer_idxs_local;
3312     if (n_recvs) {
3313       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3314     }
3315     for (i=0;i<n_recvs;i++) {
3316       PetscInt j;
3317       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3318         for (j=0;j<*(ptr_idxs+1);j++) {
3319           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3320         }
3321       } else {
3322         /* TODO */
3323       }
3324       ptr_idxs += olengths_idxs[i];
3325     }
3326     if (new_local_nnz) {
3327       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3328       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3329       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3330       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3331       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3332       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3333     } else {
3334       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3335     }
3336     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3337   } else {
3338     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3339   }
3340 
3341   /* set values */
3342   ptr_vals = recv_buffer_vals;
3343   ptr_idxs = recv_buffer_idxs_local;
3344   for (i=0;i<n_recvs;i++) {
3345     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3346       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3347       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3348       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3349       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3350       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3351     } else {
3352       /* TODO */
3353     }
3354     ptr_idxs += olengths_idxs[i];
3355     ptr_vals += olengths_vals[i];
3356   }
3357   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3358   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3359   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3360   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3361   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3362   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3363 
3364 #if 0
3365   if (!restrict_comm) { /* check */
3366     Vec       lvec,rvec;
3367     PetscReal infty_error;
3368 
3369     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3370     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3371     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3372     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3373     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3374     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3375     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3376     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3377     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3378   }
3379 #endif
3380 
3381   /* assemble new additional is (if any) */
3382   if (nis) {
3383     PetscInt **temp_idxs,*count_is,j,psum;
3384 
3385     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3386     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3387     ptr_idxs = recv_buffer_idxs_is;
3388     psum = 0;
3389     for (i=0;i<n_recvs;i++) {
3390       for (j=0;j<nis;j++) {
3391         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3392         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3393         psum += plen;
3394         ptr_idxs += plen+1; /* shift pointer to received data */
3395       }
3396     }
3397     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3398     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3399     for (i=1;i<nis;i++) {
3400       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3401     }
3402     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3403     ptr_idxs = recv_buffer_idxs_is;
3404     for (i=0;i<n_recvs;i++) {
3405       for (j=0;j<nis;j++) {
3406         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3407         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3408         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3409         ptr_idxs += plen+1; /* shift pointer to received data */
3410       }
3411     }
3412     for (i=0;i<nis;i++) {
3413       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3414       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3415       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3416     }
3417     ierr = PetscFree(count_is);CHKERRQ(ierr);
3418     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3419     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3420   }
3421   /* free workspace */
3422   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3423   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3424   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3425   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3426   if (isdense) {
3427     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3428     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3429   } else {
3430     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3431   }
3432   if (nis) {
3433     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3434     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3435   }
3436   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3437   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3438   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3439   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3440   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3441   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3442   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3443   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3444   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3445   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3446   ierr = PetscFree(onodes);CHKERRQ(ierr);
3447   if (nis) {
3448     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3449     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3450     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3451   }
3452   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3453   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3454     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3455     for (i=0;i<nis;i++) {
3456       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3457     }
3458   }
3459   PetscFunctionReturn(0);
3460 }
3461 
3462 /* temporary hack into ksp private data structure */
3463 #include <petsc-private/kspimpl.h>
3464 
3465 #undef __FUNCT__
3466 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3467 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3468 {
3469   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3470   PC_IS                  *pcis = (PC_IS*)pc->data;
3471   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3472   MatNullSpace           CoarseNullSpace=NULL;
3473   ISLocalToGlobalMapping coarse_islg;
3474   IS                     coarse_is,*isarray;
3475   PetscInt               i,im_active=-1,active_procs=-1;
3476   PetscInt               nis,nisdofs,nisneu;
3477   PC                     pc_temp;
3478   PCType                 coarse_pc_type;
3479   KSPType                coarse_ksp_type;
3480   PetscBool              multilevel_requested,multilevel_allowed;
3481   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3482   Mat                    t_coarse_mat_is;
3483   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3484   PetscMPIInt            all_procs;
3485   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3486   PetscBool              compute_vecs = PETSC_FALSE;
3487   PetscScalar            *array;
3488   PetscErrorCode         ierr;
3489 
3490   PetscFunctionBegin;
3491   /* Assign global numbering to coarse dofs */
3492   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3493     compute_vecs = PETSC_TRUE;
3494     PetscInt ocoarse_size;
3495     ocoarse_size = pcbddc->coarse_size;
3496     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3497     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3498     /* see if we can avoid some work */
3499     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3500       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3501         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3502         coarse_reuse = PETSC_FALSE;
3503       } else { /* we can safely reuse already computed coarse matrix */
3504         coarse_reuse = PETSC_TRUE;
3505       }
3506     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3507       coarse_reuse = PETSC_FALSE;
3508     }
3509     /* reset any subassembling information */
3510     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3511     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3512   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3513     coarse_reuse = PETSC_TRUE;
3514   }
3515 
3516   /* count "active" (i.e. with positive local size) and "void" processes */
3517   im_active = !!(pcis->n);
3518   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3519   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3520   void_procs = all_procs-active_procs;
3521   csin_type_simple = PETSC_TRUE;
3522   redist = PETSC_FALSE;
3523   if (pcbddc->current_level && void_procs) {
3524     csin_ml = PETSC_TRUE;
3525     ncoarse_ml = void_procs;
3526     csin_ds = PETSC_TRUE;
3527     ncoarse_ds = void_procs;
3528   } else {
3529     csin_ml = PETSC_FALSE;
3530     ncoarse_ml = all_procs;
3531     if (void_procs) {
3532       csin_ds = PETSC_TRUE;
3533       ncoarse_ds = void_procs;
3534       csin_type_simple = PETSC_FALSE;
3535     } else {
3536       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3537         csin_ds = PETSC_TRUE;
3538         ncoarse_ds = pcbddc->redistribute_coarse;
3539         redist = PETSC_TRUE;
3540       } else {
3541         csin_ds = PETSC_FALSE;
3542         ncoarse_ds = all_procs;
3543       }
3544     }
3545   }
3546 
3547   /*
3548     test if we can go multilevel: three conditions must be satisfied:
3549     - we have not exceeded the number of levels requested
3550     - we can actually subassemble the active processes
3551     - we can find a suitable number of MPI processes where we can place the subassembled problem
3552   */
3553   multilevel_allowed = PETSC_FALSE;
3554   multilevel_requested = PETSC_FALSE;
3555   if (pcbddc->current_level < pcbddc->max_levels) {
3556     multilevel_requested = PETSC_TRUE;
3557     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3558       multilevel_allowed = PETSC_FALSE;
3559     } else {
3560       multilevel_allowed = PETSC_TRUE;
3561     }
3562   }
3563   /* determine number of process partecipating to coarse solver */
3564   if (multilevel_allowed) {
3565     ncoarse = ncoarse_ml;
3566     csin = csin_ml;
3567   } else {
3568     ncoarse = ncoarse_ds;
3569     csin = csin_ds;
3570   }
3571 
3572   /* creates temporary l2gmap and IS for coarse indexes */
3573   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3574   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3575 
3576   /* creates temporary MATIS object for coarse matrix */
3577   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3578   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3579   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3580   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3581 #if 0
3582   {
3583     PetscViewer viewer;
3584     char filename[256];
3585     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3586     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3587     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3588     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3589     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3590   }
3591 #endif
3592   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3593   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3594   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3595   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3597 
3598   /* compute dofs splitting and neumann boundaries for coarse dofs */
3599   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3600     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3601     const PetscInt         *idxs;
3602     ISLocalToGlobalMapping tmap;
3603 
3604     /* create map between primal indices (in local representative ordering) and local primal numbering */
3605     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3606     /* allocate space for temporary storage */
3607     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3608     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3609     /* allocate for IS array */
3610     nisdofs = pcbddc->n_ISForDofsLocal;
3611     nisneu = !!pcbddc->NeumannBoundariesLocal;
3612     nis = nisdofs + nisneu;
3613     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3614     /* dofs splitting */
3615     for (i=0;i<nisdofs;i++) {
3616       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3617       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3618       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3619       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3620       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3621       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3622       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3623       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3624     }
3625     /* neumann boundaries */
3626     if (pcbddc->NeumannBoundariesLocal) {
3627       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3628       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3629       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3630       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3631       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3632       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3633       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3634       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3635     }
3636     /* free memory */
3637     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3638     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3639     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3640   } else {
3641     nis = 0;
3642     nisdofs = 0;
3643     nisneu = 0;
3644     isarray = NULL;
3645   }
3646   /* destroy no longer needed map */
3647   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3648 
3649   /* restrict on coarse candidates (if needed) */
3650   coarse_mat_is = NULL;
3651   if (csin) {
3652     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3653       if (redist) {
3654         PetscMPIInt rank;
3655         PetscInt spc,n_spc_p1,dest[1];
3656 
3657         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3658         spc = all_procs/pcbddc->redistribute_coarse;
3659         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3660         if (rank > n_spc_p1*(spc+1)-1) {
3661           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3662         } else {
3663           dest[0] = rank/(spc+1);
3664         }
3665         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3666       } else {
3667         PetscInt j,tissize,*nisindices;
3668         PetscInt *coarse_candidates;
3669         const PetscInt* tisindices;
3670         /* get coarse candidates' ranks in pc communicator */
3671         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3672         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3673         for (i=0,j=0;i<all_procs;i++) {
3674           if (!coarse_candidates[i]) {
3675             coarse_candidates[j]=i;
3676             j++;
3677           }
3678         }
3679         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3680         /* get a suitable subassembling pattern */
3681         if (csin_type_simple) {
3682           PetscMPIInt rank;
3683           PetscInt    issize,isidx;
3684           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3685           if (im_active) {
3686             issize = 1;
3687             isidx = (PetscInt)rank;
3688           } else {
3689             issize = 0;
3690             isidx = -1;
3691           }
3692           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3693         } else {
3694           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3695         }
3696         if (pcbddc->dbg_flag) {
3697           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3698           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3699           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3700           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3701           for (i=0;i<j;i++) {
3702             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3703           }
3704           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3705           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3706         }
3707         /* shift the pattern on coarse candidates */
3708         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3709         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3710         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3711         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3712         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3713         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3714         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3715       }
3716     }
3717     if (pcbddc->dbg_flag) {
3718       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3719       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3720       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3721       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3722     }
3723     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3724     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3725   } else {
3726     if (pcbddc->dbg_flag) {
3727       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3728       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3729       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3730     }
3731     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3732     coarse_mat_is = t_coarse_mat_is;
3733   }
3734 
3735   /* create local to global scatters for coarse problem */
3736   if (compute_vecs) {
3737     PetscInt lrows;
3738     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3739     if (coarse_mat_is) {
3740       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3741     } else {
3742       lrows = 0;
3743     }
3744     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3745     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3746     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3747     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3748     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3749   }
3750   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3751   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3752 
3753   /* set defaults for coarse KSP and PC */
3754   if (multilevel_allowed) {
3755     coarse_ksp_type = KSPRICHARDSON;
3756     coarse_pc_type = PCBDDC;
3757   } else {
3758     coarse_ksp_type = KSPPREONLY;
3759     coarse_pc_type = PCREDUNDANT;
3760   }
3761 
3762   /* print some info if requested */
3763   if (pcbddc->dbg_flag) {
3764     if (!multilevel_allowed) {
3765       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3766       if (multilevel_requested) {
3767         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3768       } else if (pcbddc->max_levels) {
3769         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3770       }
3771       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3772     }
3773   }
3774 
3775   /* create the coarse KSP object only once with defaults */
3776   if (coarse_mat_is) {
3777     MatReuse coarse_mat_reuse;
3778     PetscViewer dbg_viewer = NULL;
3779     if (pcbddc->dbg_flag) {
3780       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3781       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3782     }
3783     if (!pcbddc->coarse_ksp) {
3784       char prefix[256],str_level[16];
3785       size_t len;
3786       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3787       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3788       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3789       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3790       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3791       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3792       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3793       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3794       /* prefix */
3795       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3796       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3797       if (!pcbddc->current_level) {
3798         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3799         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3800       } else {
3801         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3802         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3803         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3804         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3805         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3806         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3807       }
3808       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3809       /* allow user customization */
3810       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3811       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3812     }
3813 
3814     /* get some info after set from options */
3815     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3816     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3817     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3818     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3819     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3820       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3821       isbddc = PETSC_FALSE;
3822     }
3823     if (isredundant) {
3824       KSP inner_ksp;
3825       PC inner_pc;
3826       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3827       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3828       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3829     }
3830 
3831     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3832     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3833     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3834     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3835     if (nisdofs) {
3836       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3837       for (i=0;i<nisdofs;i++) {
3838         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3839       }
3840     }
3841     if (nisneu) {
3842       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3843       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3844     }
3845 
3846     /* assemble coarse matrix */
3847     if (coarse_reuse) {
3848       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3849       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3850       coarse_mat_reuse = MAT_REUSE_MATRIX;
3851     } else {
3852       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3853     }
3854     if (isbddc || isnn) {
3855       if (pcbddc->coarsening_ratio > 1) {
3856         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3857           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3858           if (pcbddc->dbg_flag) {
3859             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3860             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3861             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3862             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3863           }
3864         }
3865         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3866       } else {
3867         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3868         coarse_mat = coarse_mat_is;
3869       }
3870     } else {
3871       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3872     }
3873     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3874 
3875     /* propagate symmetry info to coarse matrix */
3876     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3877     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3878 
3879     /* set operators */
3880     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3881     if (pcbddc->dbg_flag) {
3882       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3883     }
3884   } else { /* processes non partecipating to coarse solver (if any) */
3885     coarse_mat = 0;
3886   }
3887   ierr = PetscFree(isarray);CHKERRQ(ierr);
3888 #if 0
3889   {
3890     PetscViewer viewer;
3891     char filename[256];
3892     sprintf(filename,"coarse_mat.m");
3893     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3894     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3895     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3896     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3897   }
3898 #endif
3899 
3900   /* Compute coarse null space (special handling by BDDC only) */
3901   if (pcbddc->NullSpace) {
3902     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3903   }
3904 
3905   if (pcbddc->coarse_ksp) {
3906     Vec crhs,csol;
3907     PetscBool ispreonly;
3908     if (CoarseNullSpace) {
3909       if (isbddc) {
3910         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3911       } else {
3912         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3913       }
3914     }
3915     /* setup coarse ksp */
3916     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3917     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3918     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3919     /* hack */
3920     if (!csol) {
3921       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3922     }
3923     if (!crhs) {
3924       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3925     }
3926     /* Check coarse problem if in debug mode or if solving with an iterative method */
3927     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3928     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3929       KSP       check_ksp;
3930       KSPType   check_ksp_type;
3931       PC        check_pc;
3932       Vec       check_vec,coarse_vec;
3933       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3934       PetscInt  its;
3935       PetscBool compute_eigs;
3936       PetscReal *eigs_r,*eigs_c;
3937       PetscInt  neigs;
3938       const char *prefix;
3939 
3940       /* Create ksp object suitable for estimation of extreme eigenvalues */
3941       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3942       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3943       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3944       if (ispreonly) {
3945         check_ksp_type = KSPPREONLY;
3946         compute_eigs = PETSC_FALSE;
3947       } else {
3948         check_ksp_type = KSPGMRES;
3949         compute_eigs = PETSC_TRUE;
3950       }
3951       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3952       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3953       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3954       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3955       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3956       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3957       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3958       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3959       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3960       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3961       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3962       /* create random vec */
3963       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3964       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3965       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3966       if (CoarseNullSpace) {
3967         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3968       }
3969       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3970       /* solve coarse problem */
3971       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3972       if (CoarseNullSpace) {
3973         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3974       }
3975       /* set eigenvalue estimation if preonly has not been requested */
3976       if (compute_eigs) {
3977         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
3978         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
3979         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3980         lambda_max = eigs_r[neigs-1];
3981         lambda_min = eigs_r[0];
3982         if (pcbddc->use_coarse_estimates) {
3983           if (lambda_max>lambda_min) {
3984             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3985             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3986           }
3987         }
3988       }
3989 
3990       /* check coarse problem residual error */
3991       if (pcbddc->dbg_flag) {
3992         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3993         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3994         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3995         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3996         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3997         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3998         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3999         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4000         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4001         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4002         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4003         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4004         if (compute_eigs) {
4005           PetscReal lambda_max_s,lambda_min_s;
4006           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4007           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4008           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4009           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4010           for (i=0;i<neigs;i++) {
4011             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4012           }
4013         }
4014         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4015         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4016       }
4017       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4018       if (compute_eigs) {
4019         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4020         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4021       }
4022     }
4023   }
4024   /* print additional info */
4025   if (pcbddc->dbg_flag) {
4026     /* waits until all processes reaches this point */
4027     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4028     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4029     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4030   }
4031 
4032   /* free memory */
4033   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4034   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4035   PetscFunctionReturn(0);
4036 }
4037 
4038 #undef __FUNCT__
4039 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4040 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4041 {
4042   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4043   PC_IS*         pcis = (PC_IS*)pc->data;
4044   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4045   PetscInt       i,coarse_size;
4046   PetscInt       *local_primal_indices;
4047   PetscErrorCode ierr;
4048 
4049   PetscFunctionBegin;
4050   /* Compute global number of coarse dofs */
4051   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4052     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4053   }
4054   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4055 
4056   /* check numbering */
4057   if (pcbddc->dbg_flag) {
4058     PetscScalar coarsesum,*array;
4059     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4060 
4061     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4062     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4063     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4064     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4065     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4066     for (i=0;i<pcbddc->local_primal_size;i++) {
4067       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4068     }
4069     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4070     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4071     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4072     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4073     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4074     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4075     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4076     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4077     for (i=0;i<pcis->n;i++) {
4078       if (array[i] == 1.0) {
4079         set_error = PETSC_TRUE;
4080         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4081       }
4082     }
4083     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4084     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4085     for (i=0;i<pcis->n;i++) {
4086       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4087     }
4088     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4089     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4090     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4091     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4092     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4093     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4094     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4095       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4096       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4097       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4098       for (i=0;i<pcbddc->local_primal_size;i++) {
4099         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4100       }
4101       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4102     }
4103     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4104     if (set_error_reduced) {
4105       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4106     }
4107   }
4108   /* get back data */
4109   *coarse_size_n = coarse_size;
4110   *local_primal_indices_n = local_primal_indices;
4111   PetscFunctionReturn(0);
4112 }
4113 
4114 #undef __FUNCT__
4115 #define __FUNCT__ "PCBDDCGlobalToLocal"
4116 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4117 {
4118   IS             localis_t;
4119   PetscInt       i,lsize,*idxs,n;
4120   PetscScalar    *vals;
4121   PetscErrorCode ierr;
4122 
4123   PetscFunctionBegin;
4124   /* get indices in local ordering exploiting local to global map */
4125   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4126   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4127   for (i=0;i<lsize;i++) vals[i] = 1.0;
4128   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4129   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4130   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4131   if (idxs) { /* multilevel guard */
4132     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4133   }
4134   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4135   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4136   ierr = PetscFree(vals);CHKERRQ(ierr);
4137   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4138   /* now compute set in local ordering */
4139   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4140   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4141   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4142   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4143   for (i=0,lsize=0;i<n;i++) {
4144     if (PetscRealPart(vals[i]) > 0.5) {
4145       lsize++;
4146     }
4147   }
4148   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4149   for (i=0,lsize=0;i<n;i++) {
4150     if (PetscRealPart(vals[i]) > 0.5) {
4151       idxs[lsize++] = i;
4152     }
4153   }
4154   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4155   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4156   *localis = localis_t;
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4161 #undef __FUNCT__
4162 #define __FUNCT__ "PCBDDCMatMult_Private"
4163 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4164 {
4165   PCBDDCChange_ctx change_ctx;
4166   PetscErrorCode   ierr;
4167 
4168   PetscFunctionBegin;
4169   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4170   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4171   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4172   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 #undef __FUNCT__
4177 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4178 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4179 {
4180   PCBDDCChange_ctx change_ctx;
4181   PetscErrorCode   ierr;
4182 
4183   PetscFunctionBegin;
4184   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4185   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4186   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4187   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4188   PetscFunctionReturn(0);
4189 }
4190