xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 9b28b3ffae45aa8f1034174f2416a0bca19e1ef2)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "PCBDDCResetSolvers"
86 PetscErrorCode PCBDDCResetSolvers(PC pc)
87 {
88   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
93   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
111   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_constraints,n_R,old_size;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   if (!pcbddc->ConstraintMatrix) {
131     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
132   }
133   /* get sizes */
134   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
135   n_R = pcis->n-pcbddc->n_actual_vertices;
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   /* local work vectors (try to avoid unneeded work)*/
138   /* R nodes */
139   old_size = -1;
140   if (pcbddc->vec1_R) {
141     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
142   }
143   if (n_R != old_size) {
144     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
149     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
150   }
151   /* local primal dofs */
152   old_size = -1;
153   if (pcbddc->vec1_P) {
154     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
155   }
156   if (pcbddc->local_primal_size != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
158     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
160     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
161   }
162   /* local explicit constraints */
163   old_size = -1;
164   if (pcbddc->vec1_C) {
165     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
166   }
167   if (n_constraints && n_constraints != old_size) {
168     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
169     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
171     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "PCBDDCSetUpCorrection"
178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
179 {
180   PetscErrorCode         ierr;
181   /* pointers to pcis and pcbddc */
182   PC_IS*                 pcis = (PC_IS*)pc->data;
183   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
184   /* submatrices of local problem */
185   Mat                    A_RV,A_VR,A_VV;
186   /* working matrices */
187   Mat                    M1,M2,M3,C_CR;
188   /* working vectors */
189   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
190   /* additional working stuff */
191   IS                     is_aux;
192   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
193   const PetscScalar      *array,*row_cmat_values;
194   const PetscInt         *row_cmat_indices,*idx_R_local;
195   PetscInt               *idx_V_B,*auxindices;
196   PetscInt               n_vertices,n_constraints,size_of_constraint;
197   PetscInt               i,j,n_R,n_D,n_B;
198   PetscBool              unsymmetric_check;
199   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
200   MatType                impMatType;
201   /* some shortcuts to scalars */
202   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
203   /* for debugging purposes */
204   PetscReal              *coarsefunctions_errors,*constraints_errors;
205 
206   PetscFunctionBegin;
207   /* get number of vertices (corners plus constraints with change of basis)
208      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
209   n_vertices = pcbddc->n_actual_vertices;
210   n_constraints = pcbddc->local_primal_size-n_vertices;
211   /* Set Non-overlapping dimensions */
212   n_B = pcis->n_B; n_D = pcis->n - n_B;
213   n_R = pcis->n-n_vertices;
214 
215   /* Set types for local objects needed by BDDC precondtioner */
216   impMatType = MATSEQDENSE;
217 
218   /* Allocating some extra storage just to be safe */
219   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
220   for (i=0;i<pcis->n;i++) auxindices[i]=i;
221 
222   /* vertices in boundary numbering */
223   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
224   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
225   if (i != n_vertices) {
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
227   }
228 
229   /* Precompute stuffs needed for preprocessing and application of BDDC*/
230   if (n_constraints) {
231     /* see if we can save some allocations */
232     if (pcbddc->local_auxmat2) {
233       PetscInt on_R,on_constraints;
234       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
235       if (on_R != n_R || on_constraints != n_constraints) {
236         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
237         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
238       }
239     }
240     /* work vectors */
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
243     /* auxiliary matrices */
244     if (!pcbddc->local_auxmat2) {
245       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
246       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
247       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
248       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
249     }
250 
251     /* Extract constraints on R nodes: C_{CR}  */
252     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
253     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
254     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
255 
256     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
257     for (i=0;i<n_constraints;i++) {
258       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
259       /* Get row of constraint matrix in R numbering */
260       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
262       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
263       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
264       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
265       /* Solve for row of constraint matrix in R numbering */
266       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
267       /* Set values in local_auxmat2 */
268       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
270       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
271     }
272     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
275 
276     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
277     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
278     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
279     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
280     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
281     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
282     ierr = MatSetUp(M1);CHKERRQ(ierr);
283     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
284     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
285     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
286     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
287     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
288     ierr = MatDestroy(&M2);CHKERRQ(ierr);
289     ierr = MatDestroy(&M3);CHKERRQ(ierr);
290     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
291     if (!pcbddc->local_auxmat1) {
292       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     } else {
294       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
295     }
296   }
297 
298   /* Get submatrices from subdomain matrix */
299   if (n_vertices) {
300     PetscInt ibs,mbs;
301     PetscBool issbaij;
302     Mat newmat;
303 
304     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
305     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
306     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
307     if (ibs != mbs) { /* need to convert to SEQAIJ */
308       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
313     } else {
314       /* this is safe */
315       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
316       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
317       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
318         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
319         /* which of the two approaches is faster? */
320         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
322         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
324         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
325       } else {
326         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
327         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
328       }
329     }
330     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
331     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
332     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
333   }
334 
335   /* Matrix of coarse basis functions (local) */
336   if (pcbddc->coarse_phi_B) {
337     PetscInt on_B,on_primal;
338     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
339     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
342     }
343   }
344   if (pcbddc->coarse_phi_D) {
345     PetscInt on_D,on_primal;
346     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
347     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
348       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
349       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
350     }
351   }
352   if (!pcbddc->coarse_phi_B) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
357   }
358   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
359     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
360     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
361     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
362     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
363   }
364 
365   if (pcbddc->dbg_flag) {
366     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
367     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
369   }
370   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
371   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
372 
373   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
374 
375   /* vertices */
376   for (i=0;i<n_vertices;i++) {
377     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
378     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
379     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
380     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
382     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
383     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
384     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
387     if (n_constraints) {
388       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
389       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
390       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
391     }
392     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
393     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
394 
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
398     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
402     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
414     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
415     if (n_constraints) {
416       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
417       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
418       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     }
420 
421     /* check */
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on local nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
429       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
430       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
431       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
432       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
433       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       if (n_constraints) {
437         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       }
441       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
443       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
444       /* check saddle point solution */
445       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
447       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
448       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
449       /* shift by the identity matrix */
450       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
454     }
455   }
456 
457   /* constraints */
458   for (i=0;i<n_constraints;i++) {
459     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
460     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
462     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
463     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
464     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
465     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
466     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
467     if (n_vertices) {
468       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
469     }
470     /* Set values in coarse basis function and subdomain part of coarse_mat */
471     /* coarse basis functions */
472     j = i+n_vertices; /* don't touch this! */
473     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
474     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
477     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
478     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
483       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
484       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485     }
486     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
487     if (n_vertices) {
488       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
489       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
490       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
491     }
492     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
493     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
494     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
495 
496     if (pcbddc->dbg_flag) {
497       /* assemble subdomain vector on nodes */
498       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
499       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
500       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
501       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
502       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
503       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
504       /* assemble subdomain vector of lagrange multipliers */
505       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
506       if (n_vertices) {
507         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
508         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
509         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
510       }
511       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
512       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
513       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
514       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
515       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
516       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
517       /* check saddle point solution */
518       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
519       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
520       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
521       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
522       /* shift by the identity matrix */
523       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
524       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
525       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
526       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
527     }
528   }
529   /* call assembling routines for local coarse basis */
530   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   if (pcbddc->switch_static || pcbddc->dbg_flag) {
533     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   }
536 
537   /* compute other basis functions for non-symmetric problems */
538   ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr);
539   if (!pcbddc->issym) {
540     if (!pcbddc->coarse_psi_B) {
541       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
542       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
543       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
544       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
545     }
546     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
547       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
548       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
549       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
550       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
551     }
552     for (i=0;i<pcbddc->local_primal_size;i++) {
553       if (n_constraints) {
554         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
555         for (j=0;j<n_constraints;j++) {
556           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
557         }
558         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
559         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
560       }
561       if (i<n_vertices) {
562         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
563         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
564         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
565         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
566         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
567         if (n_constraints) {
568           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
569         }
570       } else {
571         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
572       }
573       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
574       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
575       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
576       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
577       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
578       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
579       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
580       if (i<n_vertices) {
581         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
582       }
583       if (pcbddc->switch_static || pcbddc->dbg_flag) {
584         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
585         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
586         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
587         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
588         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
589       }
590 
591       if (pcbddc->dbg_flag) {
592         /* assemble subdomain vector on nodes */
593         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
594         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
595         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
596         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
597         if (i<n_vertices) {
598           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
599         }
600         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
601         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
602         /* assemble subdomain vector of lagrange multipliers */
603         for (j=0;j<pcbddc->local_primal_size;j++) {
604           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
605         }
606         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
607         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
608         /* check saddle point solution */
609         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
610         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
611         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
612         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
613         /* shift by the identity matrix */
614         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
615         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
616         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
617         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
618       }
619     }
620     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622     if (pcbddc->switch_static || pcbddc->dbg_flag) {
623       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
624       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
625     }
626     unsymmetric_check = PETSC_TRUE;
627   } else { /* take references to already computed coarse basis */
628     unsymmetric_check = PETSC_FALSE;
629     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
630     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
631     if (pcbddc->coarse_phi_D) {
632       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
633       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
634     }
635   }
636   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
637   /* Checking coarse_sub_mat and coarse basis functios */
638   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
639   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
640   if (pcbddc->dbg_flag) {
641     Mat         coarse_sub_mat;
642     Mat         AUXMAT,TM1,TM2,TM3,TM4;
643     Mat         coarse_phi_D,coarse_phi_B;
644     Mat         coarse_psi_D,coarse_psi_B;
645     Mat         A_II,A_BB,A_IB,A_BI;
646     MatType     checkmattype=MATSEQAIJ;
647     PetscReal   real_value;
648 
649     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
650     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
651     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
652     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
653     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
654     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
655     if (unsymmetric_check) {
656       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
657       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
658     }
659     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
660 
661     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
662     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
663     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
664     if (unsymmetric_check) {
665       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
666       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
667       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
668       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
675       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
676       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
677     } else {
678       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
679       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
680       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686     }
687     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
688     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
689     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
690     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
691     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
692     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
694     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
695     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
696     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
697     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
698     for (i=0;i<pcbddc->local_primal_size;i++) {
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
700     }
701     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
702     for (i=0;i<pcbddc->local_primal_size;i++) {
703       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
704     }
705     if (unsymmetric_check) {
706       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
707       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
708         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
709       }
710       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
711       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
712         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
713       }
714     }
715     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
717     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
718     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
719     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
720     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
721     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
722     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
723     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
724     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
725     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
726     if (unsymmetric_check) {
727       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
728       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
729     }
730     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
731     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
732     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
733     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
734   }
735   /* free memory */
736   if (n_vertices) {
737     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
738     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
739     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
740     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
741     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
742   }
743   if (n_constraints) {
744     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
745     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
746     ierr = MatDestroy(&M1);CHKERRQ(ierr);
747     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
748   }
749   ierr = PetscFree(auxindices);CHKERRQ(ierr);
750   /* get back data */
751   *coarse_submat_vals_n = coarse_submat_vals;
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
757 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
758 {
759   PC_IS*            pcis = (PC_IS*)(pc->data);
760   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
761   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
762   PetscBool         issbaij,isseqaij;
763   /* manage repeated solves */
764   MatReuse          reuse;
765   PetscErrorCode    ierr;
766 
767   PetscFunctionBegin;
768   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
769     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
770   }
771   /* get mat flags */
772   reuse = MAT_INITIAL_MATRIX;
773   if (pc->setupcalled) {
774     if (pc->flag == SAME_NONZERO_PATTERN) {
775       reuse = MAT_REUSE_MATRIX;
776     } else {
777       reuse = MAT_INITIAL_MATRIX;
778     }
779   }
780   if (reuse == MAT_INITIAL_MATRIX) {
781     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
783     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
784     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
785     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
786   }
787 
788   /* transform local matrices if needed */
789   if (pcbddc->ChangeOfBasisMatrix) {
790     Mat       *change_mat_all;
791     IS        is_local,is_global;
792     PetscBool sorted;
793     PetscInt  *idxs_perm;
794 
795     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
796     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
797     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
798     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
799     if (!sorted) {
800       const PetscInt *idxs;
801       PetscInt *idxs_sorted,i;
802 
803       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
804       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
805       for (i=0;i<pcis->n;i++) {
806         idxs_perm[i] = i;
807       }
808       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
809       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
810       for (i=0;i<pcis->n;i++) {
811         idxs_sorted[i] = idxs[idxs_perm[i]];
812       }
813       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
814       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
815       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
816     }
817 
818     /* get change of basis on the whole set of local dofs */
819     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
820 
821     if (!sorted) {
822       Mat      new_mat;
823       IS       is_perm;
824       PetscInt *idxs,i;
825 
826       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
827       for (i=0;i<pcis->n;i++) {
828         idxs[idxs_perm[i]] = i;
829       }
830       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
831       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
832       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
833       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
834       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
835       change_mat_all[0] = new_mat;
836       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
837     }
838 
839     /* check */
840     if (pcbddc->dbg_flag) {
841       Vec       x,x_change;
842       PetscReal error;
843 
844       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
845       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
846       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
847       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
848       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
849       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
850       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
851       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
853       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
854       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
855       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
856       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
857       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
858       ierr = VecDestroy(&x);CHKERRQ(ierr);
859       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
860     }
861 
862     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
863     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
864     if (isseqaij) {
865       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
866     } else {
867       Mat work_mat;
868       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
869       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
870       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
871     }
872     /*
873     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
874     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
875     */
876     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
877     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
878   } else {
879     /* without change of basis, the local matrix is unchanged */
880     if (!pcbddc->local_mat) {
881       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
882       pcbddc->local_mat = matis->A;
883     }
884   }
885 
886   /* get submatrices */
887   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
888   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
889   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
890   if (!issbaij) {
891     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
892     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
893   } else {
894     Mat newmat;
895     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
896     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
897     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
898     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
899   }
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
905 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
906 {
907   PC_IS*         pcis = (PC_IS*)(pc->data);
908   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
909   IS             is_aux1,is_aux2;
910   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
911   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
912   PetscInt       vbs,bs;
913   PetscBT        bitmask;
914   PetscErrorCode ierr;
915 
916   PetscFunctionBegin;
917   /*
918     No need to setup local scatters if
919       - primal space is unchanged
920         AND
921       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
922         AND
923       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
924   */
925   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
926     PetscFunctionReturn(0);
927   }
928   /* destroy old objects */
929   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
930   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
931   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
932   /* Set Non-overlapping dimensions */
933   n_B = pcis->n_B; n_D = pcis->n - n_B;
934   n_vertices = pcbddc->n_actual_vertices;
935   /* create auxiliary bitmask */
936   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
937   for (i=0;i<n_vertices;i++) {
938     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
939   }
940 
941   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
942   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
943   for (i=0, n_R=0; i<pcis->n; i++) {
944     if (!PetscBTLookup(bitmask,i)) {
945       idx_R_local[n_R] = i;
946       n_R++;
947     }
948   }
949 
950   /* Block code */
951   vbs = 1;
952   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
953   if (bs>1 && !(n_vertices%bs)) {
954     PetscBool is_blocked = PETSC_TRUE;
955     PetscInt  *vary;
956     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
957     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
958     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
959     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
960     for (i=0; i<n_vertices; i++) {
961       if (vary[i]!=0 && vary[i]!=bs) {
962         is_blocked = PETSC_FALSE;
963         break;
964       }
965     }
966     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
967       vbs = bs;
968       for (i=0;i<n_R/vbs;i++) {
969         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
970       }
971     }
972     ierr = PetscFree(vary);CHKERRQ(ierr);
973   }
974   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
975   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
976 
977   /* print some info if requested */
978   if (pcbddc->dbg_flag) {
979     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
980     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
981     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
982     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
983     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
984     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
985     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
986     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
987   }
988 
989   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
990   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
991   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
992   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
993   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
994   for (i=0; i<n_D; i++) {
995     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
996   }
997   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
998   for (i=0, j=0; i<n_R; i++) {
999     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1000       aux_array1[j++] = i;
1001     }
1002   }
1003   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1004   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1005   for (i=0, j=0; i<n_B; i++) {
1006     if (!PetscBTLookup(bitmask,is_indices[i])) {
1007       aux_array2[j++] = i;
1008     }
1009   }
1010   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1011   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1012   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1013   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1014   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1015 
1016   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1017     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1018     for (i=0, j=0; i<n_R; i++) {
1019       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1020         aux_array1[j++] = i;
1021       }
1022     }
1023     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1024     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1025     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1026   }
1027   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1028   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1035 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1036 {
1037   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1038   PC_IS          *pcis = (PC_IS*)pc->data;
1039   PC             pc_temp;
1040   Mat            A_RR;
1041   MatReuse       reuse;
1042   PetscScalar    m_one = -1.0;
1043   PetscReal      value;
1044   PetscInt       n_D,n_R,ibs,mbs;
1045   PetscBool      use_exact,use_exact_reduced,issbaij;
1046   PetscErrorCode ierr;
1047   /* prefixes stuff */
1048   char           dir_prefix[256],neu_prefix[256],str_level[16];
1049   size_t         len;
1050 
1051   PetscFunctionBegin;
1052 
1053   /* compute prefixes */
1054   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1055   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1056   if (!pcbddc->current_level) {
1057     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1058     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1059     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1060     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1061   } else {
1062     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1063     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1064     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1065     len -= 15; /* remove "pc_bddc_coarse_" */
1066     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1067     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1068     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1069     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1070     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1071     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1072     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1073     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1074   }
1075 
1076   /* DIRICHLET PROBLEM */
1077   /* Matrix for Dirichlet problem is pcis->A_II */
1078   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1079   if (!pcbddc->ksp_D) { /* create object if not yet build */
1080     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1081     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1082     /* default */
1083     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1084     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1085     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1086     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1087     if (issbaij) {
1088       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1089     } else {
1090       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1091     }
1092     /* Allow user's customization */
1093     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1094     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1095   }
1096   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1097   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1098   if (!n_D) {
1099     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1100     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1101   }
1102   /* Set Up KSP for Dirichlet problem of BDDC */
1103   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1104   /* set ksp_D into pcis data */
1105   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1106   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1107   pcis->ksp_D = pcbddc->ksp_D;
1108 
1109   /* NEUMANN PROBLEM */
1110   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1111   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1112   if (pcbddc->ksp_R) { /* already created ksp */
1113     PetscInt nn_R;
1114     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1115     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1116     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1117     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1118       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1119       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1120       reuse = MAT_INITIAL_MATRIX;
1121     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1122       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1123         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1124         reuse = MAT_INITIAL_MATRIX;
1125       } else { /* safe to reuse the matrix */
1126         reuse = MAT_REUSE_MATRIX;
1127       }
1128     }
1129     /* last check */
1130     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1131       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1132       reuse = MAT_INITIAL_MATRIX;
1133     }
1134   } else { /* first time, so we need to create the matrix */
1135     reuse = MAT_INITIAL_MATRIX;
1136   }
1137   /* extract A_RR */
1138   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1139   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1140   if (ibs != mbs) {
1141     Mat newmat;
1142     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1143     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1144     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1145   } else {
1146     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1147   }
1148   if (!pcbddc->ksp_R) { /* create object if not present */
1149     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1150     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1151     /* default */
1152     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1153     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1154     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1155     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1156     if (issbaij) {
1157       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1158     } else {
1159       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1160     }
1161     /* Allow user's customization */
1162     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1163     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1164   }
1165   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1166   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1167   if (!n_R) {
1168     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1169     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1170   }
1171   /* Set Up KSP for Neumann problem of BDDC */
1172   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1173 
1174   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1175   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1176     /* Dirichlet */
1177     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1178     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1179     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1180     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1181     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1182     /* need to be adapted? */
1183     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1184     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1185     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1186     /* print info */
1187     if (pcbddc->dbg_flag) {
1188       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1189       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1190       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1191       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1192       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1193       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1194     }
1195     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1196       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1197     }
1198 
1199     /* Neumann */
1200     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1201     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1202     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1203     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1204     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1205     /* need to be adapted? */
1206     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1207     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1208     /* print info */
1209     if (pcbddc->dbg_flag) {
1210       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1211       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1212     }
1213     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1214       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1215     }
1216   }
1217   /* free Neumann problem's matrix */
1218   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNCT__
1223 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1224 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1225 {
1226   PetscErrorCode ierr;
1227   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1228 
1229   PetscFunctionBegin;
1230   if (applytranspose) {
1231     if (pcbddc->local_auxmat1) {
1232       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1233       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1234     }
1235     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1236   } else {
1237     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1238     if (pcbddc->local_auxmat1) {
1239       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1240       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1241     }
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1249 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1250 {
1251   PetscErrorCode ierr;
1252   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1253   PC_IS*            pcis = (PC_IS*)  (pc->data);
1254   const PetscScalar zero = 0.0;
1255 
1256   PetscFunctionBegin;
1257   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1258   if (applytranspose) {
1259     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1260     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1261   } else {
1262     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1263     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1264   }
1265   /* start communications from local primal nodes to rhs of coarse solver */
1266   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1267   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1271   /* TODO remove null space when doing multilevel */
1272   if (pcbddc->coarse_ksp) {
1273     if (applytranspose) {
1274       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1275     } else {
1276       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1277     }
1278   }
1279   /* start communications from coarse solver solution to local primal nodes */
1280   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1281 
1282   /* Local solution on R nodes */
1283   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1284   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   if (pcbddc->switch_static) {
1287     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1288     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1289   }
1290   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1291   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1292   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1294   if (pcbddc->switch_static) {
1295     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297   }
1298 
1299   /* complete communications from coarse sol to local primal nodes */
1300   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301 
1302   /* Sum contributions from two levels */
1303   if (applytranspose) {
1304     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1305     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1306   } else {
1307     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1308     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1314 #undef __FUNCT__
1315 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1316 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1317 {
1318   PetscErrorCode ierr;
1319   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1320   PetscScalar    *array,*array2;
1321   Vec            from,to;
1322 
1323   PetscFunctionBegin;
1324   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1325     from = pcbddc->coarse_vec;
1326     to = pcbddc->vec1_P;
1327     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1328       Vec tvec;
1329       PetscInt lsize;
1330       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1331       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1332       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1333       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1334       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1335       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1336       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1337     }
1338   } else { /* from local to global -> put data in coarse right hand side */
1339     from = pcbddc->vec1_P;
1340     to = pcbddc->coarse_vec;
1341   }
1342   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1348 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1349 {
1350   PetscErrorCode ierr;
1351   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1352   PetscScalar    *array,*array2;
1353   Vec            from,to;
1354 
1355   PetscFunctionBegin;
1356   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1357     from = pcbddc->coarse_vec;
1358     to = pcbddc->vec1_P;
1359   } else { /* from local to global -> put data in coarse right hand side */
1360     from = pcbddc->vec1_P;
1361     to = pcbddc->coarse_vec;
1362   }
1363   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1364   if (smode == SCATTER_FORWARD) {
1365     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1366       Vec tvec;
1367       PetscInt lsize;
1368       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1369       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1370       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1371       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1372       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1373       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1374       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1375     }
1376   }
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 /* uncomment for testing purposes */
1381 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1382 #undef __FUNCT__
1383 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1384 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1385 {
1386   PetscErrorCode    ierr;
1387   PC_IS*            pcis = (PC_IS*)(pc->data);
1388   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1389   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1390   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1391   MatType           impMatType=MATSEQAIJ;
1392   /* one and zero */
1393   PetscScalar       one=1.0,zero=0.0;
1394   /* space to store constraints and their local indices */
1395   PetscScalar       *temp_quadrature_constraint;
1396   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1397   /* iterators */
1398   PetscInt          i,j,k,total_counts,temp_start_ptr;
1399   /* stuff to store connected components stored in pcbddc->mat_graph */
1400   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1401   PetscInt          n_ISForFaces,n_ISForEdges;
1402   /* near null space stuff */
1403   MatNullSpace      nearnullsp;
1404   const Vec         *nearnullvecs;
1405   Vec               *localnearnullsp;
1406   PetscBool         nnsp_has_cnst;
1407   PetscInt          nnsp_size;
1408   PetscScalar       *array;
1409   /* BLAS integers */
1410   PetscBLASInt      lwork,lierr;
1411   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1412   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1413   /* LAPACK working arrays for SVD or POD */
1414   PetscBool         skip_lapack;
1415   PetscScalar       *work;
1416   PetscReal         *singular_vals;
1417 #if defined(PETSC_USE_COMPLEX)
1418   PetscReal         *rwork;
1419 #endif
1420 #if defined(PETSC_MISSING_LAPACK_GESVD)
1421   PetscBLASInt      Blas_one_2=1;
1422   PetscScalar       *temp_basis,*correlation_mat;
1423 #else
1424   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1425   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1426 #endif
1427   /* reuse */
1428   PetscInt          olocal_primal_size;
1429   PetscInt          *oprimal_indices_local_idxs;
1430   /* change of basis */
1431   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1432   PetscBool         boolforchange,qr_needed;
1433   PetscBT           touched,change_basis,qr_needed_idx;
1434   /* auxiliary stuff */
1435   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1436   /* some quantities */
1437   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1438   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1439 
1440 
1441   PetscFunctionBegin;
1442   /* Destroy Mat objects computed previously */
1443   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1444   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1445   /* Get index sets for faces, edges and vertices from graph */
1446   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1447     pcbddc->use_vertices = PETSC_TRUE;
1448   }
1449   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1450   /* HACKS (the two following code branches) */
1451   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1452     pcbddc->use_change_of_basis = PETSC_TRUE;
1453     pcbddc->use_change_on_faces = PETSC_FALSE;
1454   }
1455   if (pcbddc->NullSpace) {
1456     /* use_change_of_basis should be consistent among processors */
1457     PetscBool tbool = pcbddc->use_change_of_basis;
1458     ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1459   }
1460   /* print some info */
1461   if (pcbddc->dbg_flag) {
1462     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1463     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1464     i = 0;
1465     if (ISForVertices) {
1466       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1467     }
1468     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1469     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1470     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1471     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1472   }
1473   /* check if near null space is attached to global mat */
1474   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1475   if (nearnullsp) {
1476     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1477     /* remove any stored info */
1478     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1479     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1480     /* store information for BDDC solver reuse */
1481     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1482     pcbddc->onearnullspace = nearnullsp;
1483     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1484     for (i=0;i<nnsp_size;i++) {
1485       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1486     }
1487   } else { /* if near null space is not provided BDDC uses constants by default */
1488     nnsp_size = 0;
1489     nnsp_has_cnst = PETSC_TRUE;
1490   }
1491   /* get max number of constraints on a single cc */
1492   max_constraints = nnsp_size;
1493   if (nnsp_has_cnst) max_constraints++;
1494 
1495   /*
1496        Evaluate maximum storage size needed by the procedure
1497        - temp_indices will contain start index of each constraint stored as follows
1498        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1499        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1500        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1501                                                                                                                                                          */
1502   total_counts = n_ISForFaces+n_ISForEdges;
1503   total_counts *= max_constraints;
1504   n_vertices = 0;
1505   if (ISForVertices) {
1506     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1507   }
1508   total_counts += n_vertices;
1509   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1510   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1511   total_counts = 0;
1512   max_size_of_constraint = 0;
1513   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1514     if (i<n_ISForEdges) {
1515       used_IS = &ISForEdges[i];
1516     } else {
1517       used_IS = &ISForFaces[i-n_ISForEdges];
1518     }
1519     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1520     total_counts += j;
1521     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1522   }
1523   total_counts *= max_constraints;
1524   total_counts += n_vertices;
1525   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1526   /* get local part of global near null space vectors */
1527   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1528   for (k=0;k<nnsp_size;k++) {
1529     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1530     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532   }
1533 
1534   /* whether or not to skip lapack calls */
1535   skip_lapack = PETSC_TRUE;
1536   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1537 
1538   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1539   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1540     PetscScalar temp_work;
1541 #if defined(PETSC_MISSING_LAPACK_GESVD)
1542     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1543     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1544     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1545     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1546 #if defined(PETSC_USE_COMPLEX)
1547     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1548 #endif
1549     /* now we evaluate the optimal workspace using query with lwork=-1 */
1550     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1551     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1552     lwork = -1;
1553     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1554 #if !defined(PETSC_USE_COMPLEX)
1555     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1556 #else
1557     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1558 #endif
1559     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1560     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1561 #else /* on missing GESVD */
1562     /* SVD */
1563     PetscInt max_n,min_n;
1564     max_n = max_size_of_constraint;
1565     min_n = max_constraints;
1566     if (max_size_of_constraint < max_constraints) {
1567       min_n = max_size_of_constraint;
1568       max_n = max_constraints;
1569     }
1570     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1571 #if defined(PETSC_USE_COMPLEX)
1572     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1573 #endif
1574     /* now we evaluate the optimal workspace using query with lwork=-1 */
1575     lwork = -1;
1576     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1577     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1578     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1579     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1580 #if !defined(PETSC_USE_COMPLEX)
1581     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1582 #else
1583     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1584 #endif
1585     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1586     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1587 #endif /* on missing GESVD */
1588     /* Allocate optimal workspace */
1589     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1590     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1591   }
1592   /* Now we can loop on constraining sets */
1593   total_counts = 0;
1594   temp_indices[0] = 0;
1595   /* vertices */
1596   if (ISForVertices) {
1597     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1598     if (nnsp_has_cnst) { /* consider all vertices */
1599       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1600       for (i=0;i<n_vertices;i++) {
1601         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1602         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1603         total_counts++;
1604       }
1605     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1606       PetscBool used_vertex;
1607       for (i=0;i<n_vertices;i++) {
1608         used_vertex = PETSC_FALSE;
1609         k = 0;
1610         while (!used_vertex && k<nnsp_size) {
1611           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1612           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1613             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1614             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1615             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1616             total_counts++;
1617             used_vertex = PETSC_TRUE;
1618           }
1619           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1620           k++;
1621         }
1622       }
1623     }
1624     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1625     n_vertices = total_counts;
1626   }
1627 
1628   /* edges and faces */
1629   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1630     if (i<n_ISForEdges) {
1631       used_IS = &ISForEdges[i];
1632       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1633     } else {
1634       used_IS = &ISForFaces[i-n_ISForEdges];
1635       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1636     }
1637     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1638     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1639     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1640     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1641     /* change of basis should not be performed on local periodic nodes */
1642     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1643     if (nnsp_has_cnst) {
1644       PetscScalar quad_value;
1645       temp_constraints++;
1646       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1647       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1648       for (j=0;j<size_of_constraint;j++) {
1649         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1650       }
1651       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1652       total_counts++;
1653     }
1654     for (k=0;k<nnsp_size;k++) {
1655       PetscReal real_value;
1656       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1657       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1658       for (j=0;j<size_of_constraint;j++) {
1659         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1660       }
1661       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1662       /* check if array is null on the connected component */
1663       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1664       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1665       if (real_value > 0.0) { /* keep indices and values */
1666         temp_constraints++;
1667         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1668         total_counts++;
1669       }
1670     }
1671     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1672     valid_constraints = temp_constraints;
1673     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1674     if (!pcbddc->use_nnsp_true && temp_constraints) {
1675       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1676 
1677 #if defined(PETSC_MISSING_LAPACK_GESVD)
1678       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1679          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1680          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1681             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1682             from that computed using LAPACKgesvd
1683          -> This is due to a different computation of eigenvectors in LAPACKheev
1684          -> The quality of the POD-computed basis will be the same */
1685       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1686       /* Store upper triangular part of correlation matrix */
1687       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1688       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1689       for (j=0;j<temp_constraints;j++) {
1690         for (k=0;k<j+1;k++) {
1691           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1692         }
1693       }
1694       /* compute eigenvalues and eigenvectors of correlation matrix */
1695       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1696       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1697 #if !defined(PETSC_USE_COMPLEX)
1698       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1699 #else
1700       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1701 #endif
1702       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1703       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1704       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1705       j=0;
1706       while (j < temp_constraints && singular_vals[j] < tol) j++;
1707       total_counts=total_counts-j;
1708       valid_constraints = temp_constraints-j;
1709       /* scale and copy POD basis into used quadrature memory */
1710       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1711       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1712       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1713       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1714       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1715       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1716       if (j<temp_constraints) {
1717         PetscInt ii;
1718         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1719         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1720         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1721         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1722         for (k=0;k<temp_constraints-j;k++) {
1723           for (ii=0;ii<size_of_constraint;ii++) {
1724             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1725           }
1726         }
1727       }
1728 #else  /* on missing GESVD */
1729       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1730       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1731       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1732       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1733 #if !defined(PETSC_USE_COMPLEX)
1734       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1735 #else
1736       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1737 #endif
1738       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1739       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1740       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1741       k = temp_constraints;
1742       if (k > size_of_constraint) k = size_of_constraint;
1743       j = 0;
1744       while (j < k && singular_vals[k-j-1] < tol) j++;
1745       total_counts = total_counts-temp_constraints+k-j;
1746       valid_constraints = k-j;
1747 #endif /* on missing GESVD */
1748     }
1749     /* setting change_of_basis flag is safe now */
1750     if (boolforchange) {
1751       for (j=0;j<valid_constraints;j++) {
1752         PetscBTSet(change_basis,total_counts-j-1);
1753       }
1754     }
1755   }
1756   /* free index sets of faces, edges and vertices */
1757   for (i=0;i<n_ISForFaces;i++) {
1758     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1759   }
1760   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1761   for (i=0;i<n_ISForEdges;i++) {
1762     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1763   }
1764   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1765   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1766   /* map temp_indices_to_constraint in boundary numbering */
1767   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1768   if (i != temp_indices[total_counts]) {
1769     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1770   }
1771 
1772   /* free workspace */
1773   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1774     ierr = PetscFree(work);CHKERRQ(ierr);
1775 #if defined(PETSC_USE_COMPLEX)
1776     ierr = PetscFree(rwork);CHKERRQ(ierr);
1777 #endif
1778     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1779 #if defined(PETSC_MISSING_LAPACK_GESVD)
1780     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1781     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1782 #endif
1783   }
1784   for (k=0;k<nnsp_size;k++) {
1785     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1786   }
1787   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1788 
1789   /* set quantities in pcbddc data structure and store previous primal size */
1790   /* n_vertices defines the number of subdomain corners in the primal space */
1791   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1792   olocal_primal_size = pcbddc->local_primal_size;
1793   pcbddc->local_primal_size = total_counts;
1794   pcbddc->n_vertices = n_vertices;
1795   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1796 
1797   /* Create constraint matrix */
1798   /* The constraint matrix is used to compute the l2g map of primal dofs */
1799   /* so we need to set it up properly either with or without change of basis */
1800   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1801   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1802   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1803   /* array to compute a local numbering of constraints : vertices first then constraints */
1804   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1805   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1806   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1807   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1808   /* auxiliary stuff for basis change */
1809   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1810   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1811 
1812   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1813   total_primal_vertices=0;
1814   for (i=0;i<pcbddc->local_primal_size;i++) {
1815     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1816     if (size_of_constraint == 1) {
1817       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1818       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1819       aux_primal_minloc[total_primal_vertices]=0;
1820       total_primal_vertices++;
1821     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1822       PetscInt min_loc,min_index;
1823       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1824       /* find first untouched local node */
1825       k = 0;
1826       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1827       min_index = global_indices[k];
1828       min_loc = k;
1829       /* search the minimum among global nodes already untouched on the cc */
1830       for (k=1;k<size_of_constraint;k++) {
1831         /* there can be more than one constraint on a single connected component */
1832         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1833           min_index = global_indices[k];
1834           min_loc = k;
1835         }
1836       }
1837       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1838       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1839       aux_primal_minloc[total_primal_vertices]=min_loc;
1840       total_primal_vertices++;
1841     }
1842   }
1843   /* determine if a QR strategy is needed for change of basis */
1844   qr_needed = PETSC_FALSE;
1845   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1846   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1847     if (PetscBTLookup(change_basis,i)) {
1848       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1849       j = 0;
1850       for (k=0;k<size_of_constraint;k++) {
1851         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1852           j++;
1853         }
1854       }
1855       /* found more than one primal dof on the cc */
1856       if (j > 1) {
1857         PetscBTSet(qr_needed_idx,i);
1858         qr_needed = PETSC_TRUE;
1859       }
1860     }
1861   }
1862   /* free workspace */
1863   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1864 
1865   /* permute indices in order to have a sorted set of vertices */
1866   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1867 
1868   /* nonzero structure of constraint matrix */
1869   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1870   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1871   j=total_primal_vertices;
1872   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1873     if (!PetscBTLookup(change_basis,i)) {
1874       nnz[j]=temp_indices[i+1]-temp_indices[i];
1875       j++;
1876     }
1877   }
1878   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1879   ierr = PetscFree(nnz);CHKERRQ(ierr);
1880   /* set values in constraint matrix */
1881   for (i=0;i<total_primal_vertices;i++) {
1882     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1883   }
1884   total_counts = total_primal_vertices;
1885   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1886     if (!PetscBTLookup(change_basis,i)) {
1887       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1888       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1889       total_counts++;
1890     }
1891   }
1892   /* assembling */
1893   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1894   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1895   /*
1896   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1897   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1898   */
1899   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1900   if (pcbddc->use_change_of_basis) {
1901     /* dual and primal dofs on a single cc */
1902     PetscInt     dual_dofs,primal_dofs;
1903     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1904     PetscInt     primal_counter;
1905     /* working stuff for GEQRF */
1906     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1907     PetscBLASInt lqr_work;
1908     /* working stuff for UNGQR */
1909     PetscScalar  *gqr_work,lgqr_work_t;
1910     PetscBLASInt lgqr_work;
1911     /* working stuff for TRTRS */
1912     PetscScalar  *trs_rhs;
1913     PetscBLASInt Blas_NRHS;
1914     /* pointers for values insertion into change of basis matrix */
1915     PetscInt     *start_rows,*start_cols;
1916     PetscScalar  *start_vals;
1917     /* working stuff for values insertion */
1918     PetscBT      is_primal;
1919     /* matrix sizes */
1920     PetscInt     global_size,local_size;
1921     /* work array for nonzeros */
1922     PetscScalar  *nnz_array;
1923     /* temporary change of basis */
1924     Mat          localChangeOfBasisMatrix;
1925     /* auxiliary work for global change of basis */
1926     Vec          nnz_vec;
1927     PetscInt     *idxs_I,*idxs_B,*idxs_all;
1928     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
1929     PetscScalar  *vals;
1930     PetscBool    done;
1931 
1932     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
1933     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
1934     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1935     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1936 
1937     /* nonzeros for local mat */
1938     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1939     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1940     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1941       if (PetscBTLookup(change_basis,i)) {
1942         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1943         if (PetscBTLookup(qr_needed_idx,i)) {
1944           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1945         } else {
1946           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1947           /* get local primal index on the cc */
1948           j = 0;
1949           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1950           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1951         }
1952       }
1953     }
1954     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1955     /* Set initial identity in the matrix */
1956     for (i=0;i<pcis->n_B;i++) {
1957       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1958     }
1959 
1960     if (pcbddc->dbg_flag) {
1961       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1962       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1963     }
1964 
1965 
1966     /* Now we loop on the constraints which need a change of basis */
1967     /*
1968        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1969        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1970 
1971        Basic blocks of change of basis matrix T computed
1972 
1973           - Using the following block transformation if there is only a primal dof on the cc
1974             (in the example, primal dof is the last one of the edge in LOCAL ordering
1975              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1976             | 1        0   ...        0     1 |
1977             | 0        1   ...        0     1 |
1978             |              ...                |
1979             | 0        ...            1     1 |
1980             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1981 
1982           - via QR decomposition of constraints otherwise
1983     */
1984     if (qr_needed) {
1985       /* space to store Q */
1986       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1987       /* first we issue queries for optimal work */
1988       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1989       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1990       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1991       lqr_work = -1;
1992       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1993       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1994       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1995       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1996       lgqr_work = -1;
1997       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1998       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1999       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2000       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2001       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2002       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2003       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2004       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2005       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2006       /* array to store scaling factors for reflectors */
2007       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2008       /* array to store rhs and solution of triangular solver */
2009       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2010       /* allocating workspace for check */
2011       if (pcbddc->dbg_flag) {
2012         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2013       }
2014     }
2015     /* array to store whether a node is primal or not */
2016     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2017     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2018     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2019     if (i != total_primal_vertices) {
2020       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2021     }
2022     for (i=0;i<total_primal_vertices;i++) {
2023       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2024     }
2025     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2026 
2027     /* loop on constraints and see whether or not they need a change of basis and compute it */
2028     /* -> using implicit ordering contained in temp_indices data */
2029     total_counts = pcbddc->n_vertices;
2030     primal_counter = total_counts;
2031     while (total_counts<pcbddc->local_primal_size) {
2032       primal_dofs = 1;
2033       if (PetscBTLookup(change_basis,total_counts)) {
2034         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2035         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2036           primal_dofs++;
2037         }
2038         /* get constraint info */
2039         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2040         dual_dofs = size_of_constraint-primal_dofs;
2041 
2042         if (pcbddc->dbg_flag) {
2043           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2044         }
2045 
2046         if (primal_dofs > 1) { /* QR */
2047 
2048           /* copy quadrature constraints for change of basis check */
2049           if (pcbddc->dbg_flag) {
2050             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2051           }
2052           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2053           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2054 
2055           /* compute QR decomposition of constraints */
2056           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2057           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2058           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2059           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2060           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2061           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2062           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2063 
2064           /* explictly compute R^-T */
2065           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2066           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2067           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2068           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2069           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2070           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2071           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2072           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2073           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2074           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2075 
2076           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2077           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2078           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2080           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2081           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2082           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2083           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2084           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2085 
2086           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2087              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2088              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2089           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2090           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2091           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2095           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2096           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2097           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2098           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2099 
2100           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2101           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2102           /* insert cols for primal dofs */
2103           for (j=0;j<primal_dofs;j++) {
2104             start_vals = &qr_basis[j*size_of_constraint];
2105             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2106             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2107           }
2108           /* insert cols for dual dofs */
2109           for (j=0,k=0;j<dual_dofs;k++) {
2110             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2111               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2112               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2113               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2114               j++;
2115             }
2116           }
2117 
2118           /* check change of basis */
2119           if (pcbddc->dbg_flag) {
2120             PetscInt   ii,jj;
2121             PetscBool valid_qr=PETSC_TRUE;
2122             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2123             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2124             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2125             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2126             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2127             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2128             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2129             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2130             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2131             for (jj=0;jj<size_of_constraint;jj++) {
2132               for (ii=0;ii<primal_dofs;ii++) {
2133                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2134                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2135               }
2136             }
2137             if (!valid_qr) {
2138               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2139               for (jj=0;jj<size_of_constraint;jj++) {
2140                 for (ii=0;ii<primal_dofs;ii++) {
2141                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2142                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2143                   }
2144                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2145                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2146                   }
2147                 }
2148               }
2149             } else {
2150               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2151             }
2152           }
2153         } else { /* simple transformation block */
2154           PetscInt row,col;
2155           PetscScalar val;
2156           for (j=0;j<size_of_constraint;j++) {
2157             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2158             if (!PetscBTLookup(is_primal,row)) {
2159               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2160               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2161               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2162             } else {
2163               for (k=0;k<size_of_constraint;k++) {
2164                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2165                 if (row != col) {
2166                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2167                 } else {
2168                   val = 1.0;
2169                 }
2170                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2171               }
2172             }
2173           }
2174           if (pcbddc->dbg_flag) {
2175             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2176           }
2177         }
2178         /* increment primal counter */
2179         primal_counter += primal_dofs;
2180       } else {
2181         if (pcbddc->dbg_flag) {
2182           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2183         }
2184       }
2185       /* increment constraint counter total_counts */
2186       total_counts += primal_dofs;
2187     }
2188 
2189     /* free workspace */
2190     if (qr_needed) {
2191       if (pcbddc->dbg_flag) {
2192         ierr = PetscFree(work);CHKERRQ(ierr);
2193       }
2194       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2195       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2196       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2197       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2198       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2199     }
2200     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2201     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2202     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2203 
2204     /* assembling of global change of variable */
2205     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2206     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2207     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2208     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2209     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2210     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2211 
2212     /* nonzeros (overestimated) */
2213     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2214     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2215     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2216     for (i=0;i<pcis->n;i++) {
2217       nnz_array[i] = 1.0;
2218       idxs_all[i] = i;
2219     }
2220     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2221     for (i=0;i<pcis->n_B;i++) {
2222       nnz_array[idxs_B[i]] = nnz[i];
2223     }
2224     ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2225     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2226     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2227     ierr = PetscFree(nnz);CHKERRQ(ierr);
2228     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2229     ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
2230     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2231     for (i=0;i<local_size;i++) {
2232       nnz[i] = (PetscInt)(PetscRealPart(nnz_array[i]));
2233     }
2234     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2235     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2236     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz,0,nnz);CHKERRQ(ierr);
2237     ierr = PetscFree(nnz);CHKERRQ(ierr);
2238 
2239     /* Set identity on dirichlet dofs */
2240     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2241     for (i=0;i<pcis->n-pcis->n_B;i++) {
2242       PetscScalar one=1.0;
2243       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2244     }
2245     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2246 
2247     /* Set values at interface dofs */
2248     done = PETSC_TRUE;
2249     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2250     if (!done) {
2251       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2252     }
2253     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2254     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2255     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2256     for (i=0;i<nvtxs;i++) {
2257       PetscInt    row,*cols,ncols;
2258       PetscScalar *mat_vals;
2259 
2260       row = idxs_B[i];
2261       ncols = xadj[i+1]-xadj[i];
2262       cols = idxs_mapped+xadj[i];
2263       mat_vals = vals+xadj[i];
2264       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2265     }
2266     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2267     if (!done) {
2268       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2269     }
2270     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2271     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2272     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2273     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275 
2276     /* check */
2277     if (pcbddc->dbg_flag) {
2278       PetscReal error;
2279       Vec       x,x_change;
2280 
2281       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2282       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2283       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2284       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2285       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2286       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2288       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2289       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2290       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2291       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2292       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2293       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2294       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2295       ierr = VecDestroy(&x);CHKERRQ(ierr);
2296       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2297     }
2298     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2299 
2300     /*
2301     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2302     ierr = MatView(local_ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2303     */
2304   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2305     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2306     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2307   }
2308 
2309   /* set up change of basis context */
2310   if (pcbddc->ChangeOfBasisMatrix) {
2311     PCBDDCChange_ctx change_ctx;
2312 
2313     if (!pcbddc->new_global_mat) {
2314       PetscInt global_size,local_size;
2315 
2316       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2317       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2318       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2319       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2320       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2321       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2322       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2323       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2324       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2325     } else {
2326       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2327       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2328       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2329     }
2330     if (!pcbddc->user_ChangeOfBasisMatrix) {
2331       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2332       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2333     } else {
2334       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2335       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2336     }
2337     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2338     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2339     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   }
2342 
2343   /* get indices in local ordering for vertices and constraints */
2344   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2345     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2346     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2347   }
2348   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2349   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2350   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2351   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2352   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2353   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2354   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2355   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2356   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2357   /* set quantities in PCBDDC data struct */
2358   pcbddc->n_actual_vertices = i;
2359   /* check if a new primal space has been introduced */
2360   pcbddc->new_primal_space_local = PETSC_TRUE;
2361   if (olocal_primal_size == pcbddc->local_primal_size) {
2362     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2363     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2364     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2365   }
2366   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2367   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2368 
2369   /* flush dbg viewer */
2370   if (pcbddc->dbg_flag) {
2371     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2372   }
2373 
2374   /* free workspace */
2375   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2376   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2377   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2378   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2379   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2380   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2386 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2387 {
2388   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2389   PC_IS       *pcis = (PC_IS*)pc->data;
2390   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2391   PetscInt    ierr,i,vertex_size;
2392   PetscViewer viewer=pcbddc->dbg_viewer;
2393 
2394   PetscFunctionBegin;
2395   /* Reset previously computed graph */
2396   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2397   /* Init local Graph struct */
2398   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2399 
2400   /* Check validity of the csr graph passed in by the user */
2401   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2402     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2403   }
2404 
2405   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2406   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2407     Mat mat_adj;
2408     const PetscInt *xadj,*adjncy;
2409     PetscBool flg_row=PETSC_TRUE;
2410 
2411     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2412     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2413     if (!flg_row) {
2414       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2415     }
2416     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2417     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2418     if (!flg_row) {
2419       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2420     }
2421     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2422   }
2423 
2424   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2425   vertex_size = 1;
2426   if (pcbddc->user_provided_isfordofs) {
2427     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2428       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2429       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2430         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2431         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2432       }
2433       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2434       pcbddc->n_ISForDofs = 0;
2435       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2436     }
2437     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2438     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2439   } else {
2440     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2441       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2442       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2443       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2444         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2445       }
2446     }
2447   }
2448 
2449   /* Setup of Graph */
2450   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2451     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2452   }
2453   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2454     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2455   }
2456   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2457 
2458   /* Graph's connected components analysis */
2459   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2460 
2461   /* print some info to stdout */
2462   if (pcbddc->dbg_flag) {
2463     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2464   }
2465 
2466   /* mark topography has done */
2467   pcbddc->recompute_topography = PETSC_FALSE;
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2473 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2474 {
2475   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2476   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   n = 0;
2481   vertices = 0;
2482   if (pcbddc->ConstraintMatrix) {
2483     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2484     for (i=0;i<local_primal_size;i++) {
2485       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2486       if (size_of_constraint == 1) n++;
2487       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2488     }
2489     if (vertices_idx) {
2490       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2491       n = 0;
2492       for (i=0;i<local_primal_size;i++) {
2493         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2494         if (size_of_constraint == 1) {
2495           vertices[n++]=row_cmat_indices[0];
2496         }
2497         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2498       }
2499     }
2500   }
2501   *n_vertices = n;
2502   if (vertices_idx) *vertices_idx = vertices;
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2508 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2509 {
2510   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2511   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2512   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2513   PetscBT        touched;
2514   PetscErrorCode ierr;
2515 
2516     /* This function assumes that the number of local constraints per connected component
2517        is not greater than the number of nodes defined for the connected component
2518        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2519   PetscFunctionBegin;
2520   n = 0;
2521   constraints_index = 0;
2522   if (pcbddc->ConstraintMatrix) {
2523     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2524     max_size_of_constraint = 0;
2525     for (i=0;i<local_primal_size;i++) {
2526       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2527       if (size_of_constraint > 1) {
2528         n++;
2529       }
2530       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2531       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2532     }
2533     if (constraints_idx) {
2534       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2535       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2536       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2537       n = 0;
2538       for (i=0;i<local_primal_size;i++) {
2539         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2540         if (size_of_constraint > 1) {
2541           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2542           /* find first untouched local node */
2543           j = 0;
2544           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2545           min_index = row_cmat_global_indices[j];
2546           min_loc = j;
2547           /* search the minimum among nodes not yet touched on the connected component
2548              since there can be more than one constraint on a single cc */
2549           for (j=1;j<size_of_constraint;j++) {
2550             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2551               min_index = row_cmat_global_indices[j];
2552               min_loc = j;
2553             }
2554           }
2555           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2556           constraints_index[n++] = row_cmat_indices[min_loc];
2557         }
2558         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2559       }
2560       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2561       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2562     }
2563   }
2564   *n_constraints = n;
2565   if (constraints_idx) *constraints_idx = constraints_index;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* the next two functions has been adapted from pcis.c */
2570 #undef __FUNCT__
2571 #define __FUNCT__ "PCBDDCApplySchur"
2572 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2573 {
2574   PetscErrorCode ierr;
2575   PC_IS          *pcis = (PC_IS*)(pc->data);
2576 
2577   PetscFunctionBegin;
2578   if (!vec2_B) { vec2_B = v; }
2579   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2580   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2581   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2582   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2583   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2589 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2590 {
2591   PetscErrorCode ierr;
2592   PC_IS          *pcis = (PC_IS*)(pc->data);
2593 
2594   PetscFunctionBegin;
2595   if (!vec2_B) { vec2_B = v; }
2596   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2597   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2598   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2599   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2600   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 #undef __FUNCT__
2605 #define __FUNCT__ "PCBDDCSubsetNumbering"
2606 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2607 {
2608   Vec            local_vec,global_vec;
2609   IS             seqis,paris;
2610   VecScatter     scatter_ctx;
2611   PetscScalar    *array;
2612   PetscInt       *temp_global_dofs;
2613   PetscScalar    globalsum;
2614   PetscInt       i,j,s;
2615   PetscInt       nlocals,first_index,old_index,max_local;
2616   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2617   PetscMPIInt    *dof_sizes,*dof_displs;
2618   PetscBool      first_found;
2619   PetscErrorCode ierr;
2620 
2621   PetscFunctionBegin;
2622   /* mpi buffers */
2623   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2624   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2625   j = ( !rank_prec_comm ? size_prec_comm : 0);
2626   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2627   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2628   /* get maximum size of subset */
2629   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2630   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2631   max_local = 0;
2632   for (i=0;i<n_local_dofs;i++) {
2633     if (max_local < temp_global_dofs[i] ) {
2634       max_local = temp_global_dofs[i];
2635     }
2636   }
2637   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2638   max_global++;
2639   max_local = 0;
2640   for (i=0;i<n_local_dofs;i++) {
2641     if (max_local < local_dofs[i] ) {
2642       max_local = local_dofs[i];
2643     }
2644   }
2645   max_local++;
2646   /* allocate workspace */
2647   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2648   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2649   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2650   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2651   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2652   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2653   /* create scatter */
2654   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2655   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2656   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2657   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2658   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2659   /* init array */
2660   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2661   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2662   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2663   if (local_dofs_mult) {
2664     for (i=0;i<n_local_dofs;i++) {
2665       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2666     }
2667   } else {
2668     for (i=0;i<n_local_dofs;i++) {
2669       array[local_dofs[i]]=1.0;
2670     }
2671   }
2672   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2673   /* scatter into global vec and get total number of global dofs */
2674   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2675   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2676   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2677   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2678   /* Fill global_vec with cumulative function for global numbering */
2679   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2680   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2681   nlocals = 0;
2682   first_index = -1;
2683   first_found = PETSC_FALSE;
2684   for (i=0;i<s;i++) {
2685     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2686       first_found = PETSC_TRUE;
2687       first_index = i;
2688     }
2689     nlocals += (PetscInt)PetscRealPart(array[i]);
2690   }
2691   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2692   if (!rank_prec_comm) {
2693     dof_displs[0]=0;
2694     for (i=1;i<size_prec_comm;i++) {
2695       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2696     }
2697   }
2698   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2699   if (first_found) {
2700     array[first_index] += (PetscScalar)nlocals;
2701     old_index = first_index;
2702     for (i=first_index+1;i<s;i++) {
2703       if (PetscRealPart(array[i]) > 0.1) {
2704         array[i] += array[old_index];
2705         old_index = i;
2706       }
2707     }
2708   }
2709   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2710   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2711   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2712   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2713   /* get global ordering of local dofs */
2714   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2715   if (local_dofs_mult) {
2716     for (i=0;i<n_local_dofs;i++) {
2717       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2718     }
2719   } else {
2720     for (i=0;i<n_local_dofs;i++) {
2721       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2722     }
2723   }
2724   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2725   /* free workspace */
2726   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2727   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2728   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2729   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2730   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2731   /* return pointer to global ordering of local dofs */
2732   *global_numbering_subset = temp_global_dofs;
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2738 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2739 {
2740   PetscInt       i,j;
2741   PetscScalar    *alphas;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   /* this implements stabilized Gram-Schmidt */
2746   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2747   for (i=0;i<n;i++) {
2748     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2749     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2750     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2751   }
2752   ierr = PetscFree(alphas);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 /* TODO
2757    - now preallocation is done assuming SEQDENSE local matrices
2758 */
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatISGetMPIXAIJ"
2761 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2762 {
2763   Mat                    new_mat;
2764   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2765   /* info on mat */
2766   /* ISLocalToGlobalMapping rmapping,cmapping; */
2767   PetscInt               bs,rows,cols;
2768   PetscInt               lrows,lcols;
2769   PetscInt               local_rows,local_cols;
2770   PetscBool              isdense;
2771   /* values insertion */
2772   PetscScalar            *array;
2773   PetscInt               *local_indices,*global_indices;
2774   /* work */
2775   PetscInt               i,j,index_row;
2776   PetscErrorCode         ierr;
2777 
2778   PetscFunctionBegin;
2779   /* MISSING CHECKS
2780     - rectangular case not covered (it is not allowed by MATIS)
2781   */
2782   /* get info from mat */
2783   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2784   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2785   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2786   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2787 
2788   /* work */
2789   ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr);
2790   for (i=0;i<local_rows;i++) local_indices[i]=i;
2791   /* map indices of local mat to global values */
2792   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2793   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2794   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2795 
2796   if (reuse==MAT_INITIAL_MATRIX) {
2797     Vec         vec_dnz,vec_onz;
2798     PetscScalar *my_dnz,*my_onz;
2799     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2800     PetscInt    index_col,owner;
2801     PetscMPIInt nsubdomains;
2802 
2803     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2804     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2805     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2806     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2807     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2808     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2809     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2810 
2811     /*
2812       preallocation
2813     */
2814 
2815     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2816     /*
2817        Some vectors are needed to sum up properly on shared interface dofs.
2818        Preallocation macros cannot do the job.
2819        Note that preallocation is not exact, since it overestimates nonzeros
2820     */
2821     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2822     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2823     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2824     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2825     /* All processes need to compute entire row ownership */
2826     ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr);
2827     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2828     for (i=0;i<nsubdomains;i++) {
2829       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2830         row_ownership[j]=i;
2831       }
2832     }
2833 
2834     /*
2835        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2836        then, they will be summed up properly. This way, preallocation is always sufficient
2837     */
2838     ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr);
2839     ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr);
2840     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2841     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2842     for (i=0;i<local_rows;i++) {
2843       index_row = global_indices[i];
2844       for (j=i;j<local_rows;j++) {
2845         owner = row_ownership[index_row];
2846         index_col = global_indices[j];
2847         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2848           my_dnz[i] += 1.0;
2849         } else { /* offdiag block */
2850           my_onz[i] += 1.0;
2851         }
2852         /* same as before, interchanging rows and cols */
2853         if (i != j) {
2854           owner = row_ownership[index_col];
2855           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2856             my_dnz[j] += 1.0;
2857           } else {
2858             my_onz[j] += 1.0;
2859           }
2860         }
2861       }
2862     }
2863     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2864     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2865     if (local_rows) { /* multilevel guard */
2866       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2867       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2868     }
2869     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2870     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2871     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2872     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2873     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2874     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2875     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2876 
2877     /* set computed preallocation in dnz and onz */
2878     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2879     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2880     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2881     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2882     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2883     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2884     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2885     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2886 
2887     /* Resize preallocation if overestimated */
2888     for (i=0;i<lrows;i++) {
2889       dnz[i] = PetscMin(dnz[i],lcols);
2890       onz[i] = PetscMin(onz[i],cols-lcols);
2891     }
2892     /* set preallocation */
2893     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2894     for (i=0;i<lrows/bs;i++) {
2895       dnz[i] = dnz[i*bs]/bs;
2896       onz[i] = onz[i*bs]/bs;
2897     }
2898     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2899     for (i=0;i<lrows/bs;i++) {
2900       dnz[i] = dnz[i]-i;
2901     }
2902     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2903     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2904     *M = new_mat;
2905   } else {
2906     PetscInt mbs,mrows,mcols;
2907     /* some checks */
2908     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2909     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2910     if (mrows != rows) {
2911       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2912     }
2913     if (mrows != rows) {
2914       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2915     }
2916     if (mbs != bs) {
2917       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2918     }
2919     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2920   }
2921   /* set local to global mappings */
2922   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2923   /* Set values */
2924   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2925   if (isdense) { /* special case for dense local matrices */
2926     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2927     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2928     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2929     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2930     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2931     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2932   } else { /* very basic values insertion for all other matrix types */
2933     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2934     for (i=0;i<local_rows;i++) {
2935       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2936       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2937       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2938       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2939       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2940       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2941     }
2942     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2943   }
2944   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946   if (isdense) {
2947     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2948   }
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatISGetSubassemblingPattern"
2954 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2955 {
2956   Mat             subdomain_adj;
2957   IS              new_ranks,ranks_send_to;
2958   MatPartitioning partitioner;
2959   Mat_IS          *matis;
2960   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2961   PetscInt        prank;
2962   PetscMPIInt     size,rank,color;
2963   PetscInt        *xadj,*adjncy,*oldranks;
2964   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2965   PetscInt        i,j,local_size,threshold=0;
2966   PetscErrorCode  ierr;
2967   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2968   PetscSubcomm    subcomm;
2969 
2970   PetscFunctionBegin;
2971   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2972   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2973   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2974 
2975   /* Get info on mapping */
2976   matis = (Mat_IS*)(mat->data);
2977   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2978   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2979 
2980   /* build local CSR graph of subdomains' connectivity */
2981   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2982   xadj[0] = 0;
2983   xadj[1] = PetscMax(n_neighs-1,0);
2984   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2985   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2986 
2987   if (threshold) {
2988     PetscInt* count,min_threshold;
2989     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2990     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2991     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2992       for (j=0;j<n_shared[i];j++) {
2993         count[shared[i][j]] += 1;
2994       }
2995     }
2996     /* adapt threshold since we dont want to lose significant connections */
2997     min_threshold = n_neighs;
2998     for (i=1;i<n_neighs;i++) {
2999       for (j=0;j<n_shared[i];j++) {
3000         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
3001       }
3002     }
3003     threshold = PetscMax(min_threshold+1,threshold);
3004     xadj[1] = 0;
3005     for (i=1;i<n_neighs;i++) {
3006       for (j=0;j<n_shared[i];j++) {
3007         if (count[shared[i][j]] < threshold) {
3008           adjncy[xadj[1]] = neighs[i];
3009           adjncy_wgt[xadj[1]] = n_shared[i];
3010           xadj[1]++;
3011           break;
3012         }
3013       }
3014     }
3015     ierr = PetscFree(count);CHKERRQ(ierr);
3016   } else {
3017     if (xadj[1]) {
3018       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3019       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3020     }
3021   }
3022   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3023   if (use_square) {
3024     for (i=0;i<xadj[1];i++) {
3025       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3026     }
3027   }
3028   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3029 
3030   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
3031 
3032   /*
3033     Restrict work on active processes only.
3034   */
3035   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3036   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3037   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3038   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3039   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3040   if (color) {
3041     ierr = PetscFree(xadj);CHKERRQ(ierr);
3042     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3043     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3044   } else {
3045     PetscInt coarsening_ratio;
3046     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3047     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3048     prank = rank;
3049     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3050     /*
3051     for (i=0;i<size;i++) {
3052       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3053     }
3054     */
3055     for (i=0;i<xadj[1];i++) {
3056       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3057     }
3058     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3059     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3060     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3061 
3062     /* Partition */
3063     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3064     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3065     if (use_vwgt) {
3066       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
3067       v_wgt[0] = local_size;
3068       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3069     }
3070     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3071     coarsening_ratio = size/n_subdomains;
3072     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
3073     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3074     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3075     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3076     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3077 
3078     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3079     if (contiguous) {
3080       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3081     } else {
3082       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3083     }
3084     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3085     /* clean up */
3086     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3087     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3088     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3089     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3090   }
3091   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3092 
3093   /* assemble parallel IS for sends */
3094   i = 1;
3095   if (color) i=0;
3096   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3097 
3098   /* get back IS */
3099   *is_sends = ranks_send_to;
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3104 
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatISSubassemble"
3107 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3108 {
3109   Mat                    local_mat;
3110   Mat_IS                 *matis;
3111   IS                     is_sends_internal;
3112   PetscInt               rows,cols;
3113   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3114   PetscBool              ismatis,isdense,destroy_mat;
3115   ISLocalToGlobalMapping l2gmap;
3116   PetscInt*              l2gmap_indices;
3117   const PetscInt*        is_indices;
3118   MatType                new_local_type;
3119   /* buffers */
3120   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3121   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3122   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3123   /* MPI */
3124   MPI_Comm               comm,comm_n;
3125   PetscSubcomm           subcomm;
3126   PetscMPIInt            n_sends,n_recvs,commsize;
3127   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3128   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3129   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3130   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3131   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3132   PetscErrorCode         ierr;
3133 
3134   PetscFunctionBegin;
3135   /* TODO: add missing checks */
3136   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3137   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3138   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3139   PetscValidLogicalCollectiveInt(mat,nis,7);
3140   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3141   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3142   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3143   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3144   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3145   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3146   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3147   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3148     PetscInt mrows,mcols,mnrows,mncols;
3149     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3150     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3151     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3152     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3153     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3154     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3155   }
3156   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3157   PetscValidLogicalCollectiveInt(mat,bs,0);
3158   /* prepare IS for sending if not provided */
3159   if (!is_sends) {
3160     PetscBool pcontig = PETSC_TRUE;
3161     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3162     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3163   } else {
3164     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3165     is_sends_internal = is_sends;
3166   }
3167 
3168   /* get pointer of MATIS data */
3169   matis = (Mat_IS*)mat->data;
3170 
3171   /* get comm */
3172   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3173 
3174   /* compute number of sends */
3175   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3176   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3177 
3178   /* compute number of receives */
3179   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3181   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3182   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3183   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3184   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3185   ierr = PetscFree(iflags);CHKERRQ(ierr);
3186 
3187   /* restrict comm if requested */
3188   subcomm = 0;
3189   destroy_mat = PETSC_FALSE;
3190   if (restrict_comm) {
3191     PetscMPIInt color,rank,subcommsize;
3192     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3193     color = 0;
3194     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3195     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3196     subcommsize = commsize - subcommsize;
3197     /* check if reuse has been requested */
3198     if (reuse == MAT_REUSE_MATRIX) {
3199       if (*mat_n) {
3200         PetscMPIInt subcommsize2;
3201         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3202         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3203         comm_n = PetscObjectComm((PetscObject)*mat_n);
3204       } else {
3205         comm_n = PETSC_COMM_SELF;
3206       }
3207     } else { /* MAT_INITIAL_MATRIX */
3208       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3209       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3210       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3211       comm_n = subcomm->comm;
3212     }
3213     /* flag to destroy *mat_n if not significative */
3214     if (color) destroy_mat = PETSC_TRUE;
3215   } else {
3216     comm_n = comm;
3217   }
3218 
3219   /* prepare send/receive buffers */
3220   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3221   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3222   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3223   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3224   if (nis) {
3225     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3226     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3227   }
3228 
3229   /* Get data from local matrices */
3230   if (!isdense) {
3231     /* TODO: See below some guidelines on how to prepare the local buffers */
3232     /*
3233        send_buffer_vals should contain the raw values of the local matrix
3234        send_buffer_idxs should contain:
3235        - MatType_PRIVATE type
3236        - PetscInt        size_of_l2gmap
3237        - PetscInt        global_row_indices[size_of_l2gmap]
3238        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3239     */
3240   } else {
3241     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3242     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3243     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
3244     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3245     send_buffer_idxs[1] = i;
3246     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3247     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3248     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3249     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3250     for (i=0;i<n_sends;i++) {
3251       ilengths_vals[is_indices[i]] = len*len;
3252       ilengths_idxs[is_indices[i]] = len+2;
3253     }
3254   }
3255   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3256   /* additional is (if any) */
3257   if (nis) {
3258     PetscMPIInt psum;
3259     PetscInt j;
3260     for (j=0,psum=0;j<nis;j++) {
3261       PetscInt plen;
3262       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3263       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3264       psum += len+1; /* indices + lenght */
3265     }
3266     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3267     for (j=0,psum=0;j<nis;j++) {
3268       PetscInt plen;
3269       const PetscInt *is_array_idxs;
3270       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3271       send_buffer_idxs_is[psum] = plen;
3272       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3273       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3274       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3275       psum += plen+1; /* indices + lenght */
3276     }
3277     for (i=0;i<n_sends;i++) {
3278       ilengths_idxs_is[is_indices[i]] = psum;
3279     }
3280     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3281   }
3282 
3283   buf_size_idxs = 0;
3284   buf_size_vals = 0;
3285   buf_size_idxs_is = 0;
3286   for (i=0;i<n_recvs;i++) {
3287     buf_size_idxs += (PetscInt)olengths_idxs[i];
3288     buf_size_vals += (PetscInt)olengths_vals[i];
3289     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3290   }
3291   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3292   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3293   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3294 
3295   /* get new tags for clean communications */
3296   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3297   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3298   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3299 
3300   /* allocate for requests */
3301   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3302   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3303   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3304   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3305   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3306   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3307 
3308   /* communications */
3309   ptr_idxs = recv_buffer_idxs;
3310   ptr_vals = recv_buffer_vals;
3311   ptr_idxs_is = recv_buffer_idxs_is;
3312   for (i=0;i<n_recvs;i++) {
3313     source_dest = onodes[i];
3314     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3315     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3316     ptr_idxs += olengths_idxs[i];
3317     ptr_vals += olengths_vals[i];
3318     if (nis) {
3319       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3320       ptr_idxs_is += olengths_idxs_is[i];
3321     }
3322   }
3323   for (i=0;i<n_sends;i++) {
3324     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3325     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3326     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3327     if (nis) {
3328       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3329     }
3330   }
3331   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3332   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3333 
3334   /* assemble new l2g map */
3335   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3336   ptr_idxs = recv_buffer_idxs;
3337   buf_size_idxs = 0;
3338   for (i=0;i<n_recvs;i++) {
3339     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3340     ptr_idxs += olengths_idxs[i];
3341   }
3342   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3343   ptr_idxs = recv_buffer_idxs;
3344   buf_size_idxs = 0;
3345   for (i=0;i<n_recvs;i++) {
3346     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3347     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3348     ptr_idxs += olengths_idxs[i];
3349   }
3350   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3351   ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3352   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3353 
3354   /* infer new local matrix type from received local matrices type */
3355   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3356   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3357   if (n_recvs) {
3358     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3359     ptr_idxs = recv_buffer_idxs;
3360     for (i=0;i<n_recvs;i++) {
3361       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3362         new_local_type_private = MATAIJ_PRIVATE;
3363         break;
3364       }
3365       ptr_idxs += olengths_idxs[i];
3366     }
3367     switch (new_local_type_private) {
3368       case MATDENSE_PRIVATE:
3369         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3370           new_local_type = MATSEQAIJ;
3371           bs = 1;
3372         } else { /* if I receive only 1 dense matrix */
3373           new_local_type = MATSEQDENSE;
3374           bs = 1;
3375         }
3376         break;
3377       case MATAIJ_PRIVATE:
3378         new_local_type = MATSEQAIJ;
3379         bs = 1;
3380         break;
3381       case MATBAIJ_PRIVATE:
3382         new_local_type = MATSEQBAIJ;
3383         break;
3384       case MATSBAIJ_PRIVATE:
3385         new_local_type = MATSEQSBAIJ;
3386         break;
3387       default:
3388         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3389         break;
3390     }
3391   } else { /* by default, new_local_type is seqdense */
3392     new_local_type = MATSEQDENSE;
3393     bs = 1;
3394   }
3395 
3396   /* create MATIS object if needed */
3397   if (reuse == MAT_INITIAL_MATRIX) {
3398     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3399     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3400   } else {
3401     /* it also destroys the local matrices */
3402     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3403   }
3404   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3405   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3406   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3407   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3408 
3409   /* set values */
3410   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3411   ptr_vals = recv_buffer_vals;
3412   ptr_idxs = recv_buffer_idxs;
3413   for (i=0;i<n_recvs;i++) {
3414     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3415       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3416       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3417       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3418       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3419       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3420     } else {
3421       /* TODO */
3422     }
3423     ptr_idxs += olengths_idxs[i];
3424     ptr_vals += olengths_vals[i];
3425   }
3426   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3427   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3428   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3429   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3430 
3431 #if 0
3432   if (!restrict_comm) { /* check */
3433     Vec       lvec,rvec;
3434     PetscReal infty_error;
3435 
3436     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3437     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3438     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3439     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3440     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3441     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3442     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3443     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3444     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3445   }
3446 #endif
3447 
3448   /* assemble new additional is (if any) */
3449   if (nis) {
3450     PetscInt **temp_idxs,*count_is,j,psum;
3451 
3452     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3453     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3454     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3455     ptr_idxs = recv_buffer_idxs_is;
3456     psum = 0;
3457     for (i=0;i<n_recvs;i++) {
3458       for (j=0;j<nis;j++) {
3459         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3460         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3461         psum += plen;
3462         ptr_idxs += plen+1; /* shift pointer to received data */
3463       }
3464     }
3465     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3466     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3467     for (i=1;i<nis;i++) {
3468       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3469     }
3470     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3471     ptr_idxs = recv_buffer_idxs_is;
3472     for (i=0;i<n_recvs;i++) {
3473       for (j=0;j<nis;j++) {
3474         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3475         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3476         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3477         ptr_idxs += plen+1; /* shift pointer to received data */
3478       }
3479     }
3480     for (i=0;i<nis;i++) {
3481       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3482       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3483       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3484     }
3485     ierr = PetscFree(count_is);CHKERRQ(ierr);
3486     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3487     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3488   }
3489   /* free workspace */
3490   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3491   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3492   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3493   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3494   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3495   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3496   if (isdense) {
3497     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3498     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3499   } else {
3500     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3501   }
3502   if (nis) {
3503     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3504     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3505   }
3506   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3507   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3508   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3509   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3510   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3511   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3512   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3513   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3514   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3515   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3516   ierr = PetscFree(onodes);CHKERRQ(ierr);
3517   if (nis) {
3518     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3519     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3520     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3521   }
3522   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3523   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3524     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3525     for (i=0;i<nis;i++) {
3526       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3527     }
3528   }
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 /* temporary hack into ksp private data structure */
3533 #include <petsc-private/kspimpl.h>
3534 
3535 #undef __FUNCT__
3536 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3537 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3538 {
3539   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3540   PC_IS                  *pcis = (PC_IS*)pc->data;
3541   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3542   MatNullSpace           CoarseNullSpace=NULL;
3543   ISLocalToGlobalMapping coarse_islg;
3544   IS                     coarse_is,*isarray;
3545   PetscInt               i,im_active=-1,active_procs=-1;
3546   PetscInt               nis,nisdofs,nisneu;
3547   PC                     pc_temp;
3548   PCType                 coarse_pc_type;
3549   KSPType                coarse_ksp_type;
3550   PetscBool              multilevel_requested,multilevel_allowed;
3551   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3552   Mat                    t_coarse_mat_is;
3553   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3554   PetscMPIInt            all_procs;
3555   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3556   PetscBool              compute_vecs = PETSC_FALSE;
3557   PetscErrorCode         ierr;
3558 
3559   PetscFunctionBegin;
3560   /* Assign global numbering to coarse dofs */
3561   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3562     compute_vecs = PETSC_TRUE;
3563     PetscInt ocoarse_size;
3564     ocoarse_size = pcbddc->coarse_size;
3565     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3566     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3567     /* see if we can avoid some work */
3568     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3569       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3570         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3571         coarse_reuse = PETSC_FALSE;
3572       } else { /* we can safely reuse already computed coarse matrix */
3573         coarse_reuse = PETSC_TRUE;
3574       }
3575     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3576       coarse_reuse = PETSC_FALSE;
3577     }
3578     /* reset any subassembling information */
3579     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3580     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3581   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3582     coarse_reuse = PETSC_TRUE;
3583   }
3584 
3585   /* count "active" (i.e. with positive local size) and "void" processes */
3586   im_active = !!(pcis->n);
3587   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3588   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3589   void_procs = all_procs-active_procs;
3590   csin_type_simple = PETSC_TRUE;
3591   if (pcbddc->current_level) {
3592     csin_ml = PETSC_TRUE;
3593     ncoarse_ml = void_procs;
3594     csin_ds = PETSC_TRUE;
3595     ncoarse_ds = void_procs;
3596     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3597   } else {
3598     csin_ml = PETSC_FALSE;
3599     ncoarse_ml = all_procs;
3600     if (void_procs) {
3601       csin_ds = PETSC_TRUE;
3602       ncoarse_ds = void_procs;
3603       csin_type_simple = PETSC_FALSE;
3604     } else {
3605       csin_ds = PETSC_FALSE;
3606       ncoarse_ds = all_procs;
3607     }
3608   }
3609 
3610   /*
3611     test if we can go multilevel: three conditions must be satisfied:
3612     - we have not exceeded the number of levels requested
3613     - we can actually subassemble the active processes
3614     - we can find a suitable number of MPI processes where we can place the subassembled problem
3615   */
3616   multilevel_allowed = PETSC_FALSE;
3617   multilevel_requested = PETSC_FALSE;
3618   if (pcbddc->current_level < pcbddc->max_levels) {
3619     multilevel_requested = PETSC_TRUE;
3620     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3621       multilevel_allowed = PETSC_FALSE;
3622     } else {
3623       multilevel_allowed = PETSC_TRUE;
3624     }
3625   }
3626   /* determine number of process partecipating to coarse solver */
3627   if (multilevel_allowed) {
3628     ncoarse = ncoarse_ml;
3629     csin = csin_ml;
3630   } else {
3631     ncoarse = ncoarse_ds;
3632     csin = csin_ds;
3633   }
3634 
3635   /* creates temporary l2gmap and IS for coarse indexes */
3636   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3637   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3638 
3639   /* creates temporary MATIS object for coarse matrix */
3640   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3641 #if 0
3642   {
3643     PetscViewer viewer;
3644     char filename[256];
3645     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3646     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3647     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3648     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3649     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3650   }
3651 #endif
3652   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3653   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3654   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3657 
3658   /* compute dofs splitting and neumann boundaries for coarse dofs */
3659   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3660     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3661     const PetscInt         *idxs;
3662     ISLocalToGlobalMapping tmap;
3663 
3664     /* create map between primal indices (in local representative ordering) and local primal numbering */
3665     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3666     /* allocate space for temporary storage */
3667     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3668     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3669     /* allocate for IS array */
3670     nisdofs = pcbddc->n_ISForDofsLocal;
3671     nisneu = !!pcbddc->NeumannBoundariesLocal;
3672     nis = nisdofs + nisneu;
3673     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3674     /* dofs splitting */
3675     for (i=0;i<nisdofs;i++) {
3676       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3677       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3678       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3679       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3680       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3681       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3682       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3683       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3684     }
3685     /* neumann boundaries */
3686     if (pcbddc->NeumannBoundariesLocal) {
3687       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3688       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3689       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3690       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3691       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3692       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3693       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3694       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3695     }
3696     /* free memory */
3697     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3698     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3699     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3700   } else {
3701     nis = 0;
3702     nisdofs = 0;
3703     nisneu = 0;
3704     isarray = NULL;
3705   }
3706   /* destroy no longer needed map */
3707   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3708 
3709   /* restrict on coarse candidates (if needed) */
3710   coarse_mat_is = NULL;
3711   if (csin) {
3712     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3713       PetscInt j,tissize,*nisindices;
3714       PetscInt *coarse_candidates;
3715       const PetscInt* tisindices;
3716       /* get coarse candidates' ranks in pc communicator */
3717       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3718       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3719       for (i=0,j=0;i<all_procs;i++) {
3720         if (!coarse_candidates[i]) {
3721           coarse_candidates[j]=i;
3722           j++;
3723         }
3724       }
3725       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3726       /* get a suitable subassembling pattern */
3727       if (csin_type_simple) {
3728         PetscMPIInt rank;
3729         PetscInt    issize,isidx;
3730         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3731         if (im_active) {
3732           issize = 1;
3733           isidx = (PetscInt)rank;
3734         } else {
3735           issize = 0;
3736           isidx = -1;
3737         }
3738         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3739       } else {
3740         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3741       }
3742       if (pcbddc->dbg_flag) {
3743         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3744         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3745         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3746         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3747         for (i=0;i<j;i++) {
3748           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3749         }
3750         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3751         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3752       }
3753       /* shift the pattern on coarse candidates */
3754       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3755       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3756       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3757       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3758       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3759       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3760       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3761     }
3762     if (pcbddc->dbg_flag) {
3763       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3764       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3765       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3766       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3767     }
3768     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3769     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3770   } else {
3771     if (pcbddc->dbg_flag) {
3772       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3773       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3774       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3775     }
3776     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3777     coarse_mat_is = t_coarse_mat_is;
3778   }
3779 
3780   /* create local to global scatters for coarse problem */
3781   if (compute_vecs) {
3782     PetscInt lrows;
3783     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3784     if (coarse_mat_is) {
3785       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3786     } else {
3787       lrows = 0;
3788     }
3789     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3790     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3791     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3792     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3793     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3794   }
3795   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3796   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3797 
3798   /* set defaults for coarse KSP and PC */
3799   if (multilevel_allowed) {
3800     coarse_ksp_type = KSPRICHARDSON;
3801     coarse_pc_type = PCBDDC;
3802   } else {
3803     coarse_ksp_type = KSPPREONLY;
3804     coarse_pc_type = PCREDUNDANT;
3805   }
3806 
3807   /* print some info if requested */
3808   if (pcbddc->dbg_flag) {
3809     if (!multilevel_allowed) {
3810       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3811       if (multilevel_requested) {
3812         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3813       } else if (pcbddc->max_levels) {
3814         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3815       }
3816       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3817     }
3818   }
3819 
3820   /* create the coarse KSP object only once with defaults */
3821   if (coarse_mat_is) {
3822     MatReuse coarse_mat_reuse;
3823     PetscViewer dbg_viewer;
3824     if (pcbddc->dbg_flag) {
3825       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3826       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3827     }
3828     if (!pcbddc->coarse_ksp) {
3829       char prefix[256],str_level[16];
3830       size_t len;
3831       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3832       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3833       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3834       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3835       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3836       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3837       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3838       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3839       /* prefix */
3840       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3841       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3842       if (!pcbddc->current_level) {
3843         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3844         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3845       } else {
3846         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3847         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3848         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3849         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3850         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3851         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3852       }
3853       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3854       /* allow user customization */
3855       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3856       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3857     }
3858 
3859     /* get some info after set from options */
3860     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3861     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3862     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3863     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3864     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3865       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3866       isbddc = PETSC_FALSE;
3867     }
3868     if (isredundant) {
3869       KSP inner_ksp;
3870       PC inner_pc;
3871       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3872       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3873       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3874     }
3875 
3876     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3877     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3878     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3879     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3880     if (nisdofs) {
3881       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3882       for (i=0;i<nisdofs;i++) {
3883         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3884       }
3885     }
3886     if (nisneu) {
3887       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3888       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3889     }
3890 
3891     /* assemble coarse matrix */
3892     if (coarse_reuse) {
3893       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3894       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3895       coarse_mat_reuse = MAT_REUSE_MATRIX;
3896     } else {
3897       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3898     }
3899     if (isbddc || isnn) {
3900       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3901         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3902         if (pcbddc->dbg_flag) {
3903           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3904           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3905           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3906           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3907         }
3908       }
3909       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3910     } else {
3911       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3912     }
3913     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3914 
3915     /* propagate symmetry info to coarse matrix */
3916     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3917     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3918 
3919     /* set operators */
3920     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3921     if (pcbddc->dbg_flag) {
3922       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3923     }
3924   } else { /* processes non partecipating to coarse solver (if any) */
3925     coarse_mat = 0;
3926   }
3927   ierr = PetscFree(isarray);CHKERRQ(ierr);
3928 #if 0
3929   {
3930     PetscViewer viewer;
3931     char filename[256];
3932     sprintf(filename,"coarse_mat.m");
3933     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3934     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3935     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3936     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3937   }
3938 #endif
3939 
3940   /* Compute coarse null space (special handling by BDDC only) */
3941   if (pcbddc->NullSpace) {
3942     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3943   }
3944 
3945   if (pcbddc->coarse_ksp) {
3946     Vec crhs,csol;
3947     PetscBool ispreonly;
3948     if (CoarseNullSpace) {
3949       if (isbddc) {
3950         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3951       } else {
3952         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3953       }
3954     }
3955     /* setup coarse ksp */
3956     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3957     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3958     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3959     /* hack */
3960     if (!csol) {
3961       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3962     }
3963     if (!crhs) {
3964       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3965     }
3966     /* Check coarse problem if in debug mode or if solving with an iterative method */
3967     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3968     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3969       KSP       check_ksp;
3970       KSPType   check_ksp_type;
3971       PC        check_pc;
3972       Vec       check_vec,coarse_vec;
3973       PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3974       PetscInt  its;
3975       PetscBool compute_eigs;
3976       PetscReal *eigs_r,*eigs_c;
3977       PetscInt  neigs;
3978       const char *prefix;
3979 
3980       /* Create ksp object suitable for estimation of extreme eigenvalues */
3981       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3982       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3983       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3984       if (ispreonly) {
3985         check_ksp_type = KSPPREONLY;
3986         compute_eigs = PETSC_FALSE;
3987       } else {
3988         check_ksp_type = KSPGMRES;
3989         compute_eigs = PETSC_TRUE;
3990       }
3991       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3992       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3993       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3994       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3995       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3996       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3997       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3998       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3999       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4000       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4001       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4002       /* create random vec */
4003       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4004       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4005       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4006       if (CoarseNullSpace) {
4007         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4008       }
4009       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4010       /* solve coarse problem */
4011       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4012       if (CoarseNullSpace) {
4013         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4014       }
4015       /* set eigenvalue estimation if preonly has not been requested */
4016       if (compute_eigs) {
4017         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
4018         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
4019         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4020         lambda_max = eigs_r[neigs-1];
4021         lambda_min = eigs_r[0];
4022         if (pcbddc->use_coarse_estimates) {
4023           if (lambda_max>lambda_min) {
4024             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4025             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4026           }
4027         }
4028       }
4029 
4030       /* check coarse problem residual error */
4031       if (pcbddc->dbg_flag) {
4032         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4033         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4034         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4035         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4036         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4037         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4038         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4039         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4040         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4041         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4042         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4043         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4044         if (compute_eigs) {
4045           PetscReal lambda_max_s,lambda_min_s;
4046           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4047           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4048           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4049           for (i=0;i<neigs;i++) {
4050             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4051           }
4052         }
4053         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4054         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4055       }
4056       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4057       if (compute_eigs) {
4058         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4059         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4060       }
4061     }
4062   }
4063   /* print additional info */
4064   if (pcbddc->dbg_flag) {
4065     /* waits until all processes reaches this point */
4066     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4067     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4068     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4069   }
4070 
4071   /* free memory */
4072   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4073   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4074   PetscFunctionReturn(0);
4075 }
4076 
4077 #undef __FUNCT__
4078 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4079 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4080 {
4081   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4082   PC_IS*         pcis = (PC_IS*)pc->data;
4083   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4084   PetscInt       i,coarse_size;
4085   PetscInt       *local_primal_indices;
4086   PetscErrorCode ierr;
4087 
4088   PetscFunctionBegin;
4089   /* Compute global number of coarse dofs */
4090   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4091     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4092   }
4093   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4094 
4095   /* check numbering */
4096   if (pcbddc->dbg_flag) {
4097     PetscScalar coarsesum,*array;
4098     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4099 
4100     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4101     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4102     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4103     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4104     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4105     for (i=0;i<pcbddc->local_primal_size;i++) {
4106       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4107     }
4108     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4109     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4110     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4111     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4112     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4113     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4114     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4115     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4116     for (i=0;i<pcis->n;i++) {
4117       if (array[i] == 1.0) {
4118         set_error = PETSC_TRUE;
4119         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4120       }
4121     }
4122     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4123     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4124     for (i=0;i<pcis->n;i++) {
4125       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4126     }
4127     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4128     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4129     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4130     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4131     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4132     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4133     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4134       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4135       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4136       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4137       for (i=0;i<pcbddc->local_primal_size;i++) {
4138         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4139       }
4140       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4141     }
4142     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4143     if (set_error_reduced) {
4144       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4145     }
4146   }
4147   /* get back data */
4148   *coarse_size_n = coarse_size;
4149   *local_primal_indices_n = local_primal_indices;
4150   PetscFunctionReturn(0);
4151 }
4152 
4153 #undef __FUNCT__
4154 #define __FUNCT__ "PCBDDCGlobalToLocal"
4155 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4156 {
4157   IS             localis_t;
4158   PetscInt       i,lsize,*idxs,n;
4159   PetscScalar    *vals;
4160   PetscErrorCode ierr;
4161 
4162   PetscFunctionBegin;
4163   /* get indices in local ordering exploiting local to global map */
4164   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4165   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
4166   for (i=0;i<lsize;i++) vals[i] = 1.0;
4167   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4168   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4169   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4170   if (idxs) { /* multilevel guard */
4171     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4172   }
4173   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4174   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4175   ierr = PetscFree(vals);CHKERRQ(ierr);
4176   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4177   /* now compute set in local ordering */
4178   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4179   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4180   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4181   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4182   for (i=0,lsize=0;i<n;i++) {
4183     if (PetscRealPart(vals[i]) > 0.5) {
4184       lsize++;
4185     }
4186   }
4187   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
4188   for (i=0,lsize=0;i<n;i++) {
4189     if (PetscRealPart(vals[i]) > 0.5) {
4190       idxs[lsize++] = i;
4191     }
4192   }
4193   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4194   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4195   *localis = localis_t;
4196   PetscFunctionReturn(0);
4197 }
4198 
4199 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4200 #undef __FUNCT__
4201 #define __FUNCT__ "PCBDDCMatMult_Private"
4202 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4203 {
4204   PCBDDCChange_ctx change_ctx;
4205   PetscErrorCode   ierr;
4206 
4207   PetscFunctionBegin;
4208   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4209   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4210   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4211   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4212   PetscFunctionReturn(0);
4213 }
4214 
4215 #undef __FUNCT__
4216 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4217 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4218 {
4219   PCBDDCChange_ctx change_ctx;
4220   PetscErrorCode   ierr;
4221 
4222   PetscFunctionBegin;
4223   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4224   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4225   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4226   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4227   PetscFunctionReturn(0);
4228 }
4229