xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 609885f52624dc498c55aeb0f5f78335ffdea32c)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_constraints,n_R,old_size;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   if (!pcbddc->ConstraintMatrix) {
128     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129   }
130   /* get sizes */
131   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132   n_R = pcis->n-pcbddc->n_actual_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (pcbddc->local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              unsymmetric_check;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices (corners plus constraints with change of basis)
205      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206   n_vertices = pcbddc->n_actual_vertices;
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
222   if (i != n_vertices) {
223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224   }
225 
226   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227   if (n_constraints) {
228     /* see if we can save some allocations */
229     if (pcbddc->local_auxmat2) {
230       PetscInt on_R,on_constraints;
231       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
232       if (on_R != n_R || on_constraints != n_constraints) {
233         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
234         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
235       }
236     }
237     /* work vectors */
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
240     /* auxiliary matrices */
241     if (!pcbddc->local_auxmat2) {
242       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
243       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
244       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
245       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
246     }
247 
248     /* Extract constraints on R nodes: C_{CR}  */
249     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
250     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
251     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
252 
253     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254     for (i=0;i<n_constraints;i++) {
255       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
256       /* Get row of constraint matrix in R numbering */
257       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
259       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
261       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
262       /* Solve for row of constraint matrix in R numbering */
263       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
264       /* Set values in local_auxmat2 */
265       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
267       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268     }
269     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
272 
273     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
275     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
276     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
277     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
278     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
279     ierr = MatSetUp(M1);CHKERRQ(ierr);
280     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
281     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
282     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
283     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
285     ierr = MatDestroy(&M2);CHKERRQ(ierr);
286     ierr = MatDestroy(&M3);CHKERRQ(ierr);
287     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288     if (!pcbddc->local_auxmat1) {
289       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     } else {
291       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     }
293   }
294 
295   /* Get submatrices from subdomain matrix */
296   if (n_vertices) {
297     PetscInt ibs,mbs;
298     PetscBool issbaij;
299     Mat newmat;
300 
301     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
304     if (ibs != mbs) { /* need to convert to SEQAIJ */
305       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
309       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
310     } else {
311       /* this is safe */
312       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
314       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
316         /* which of the two approaches is faster? */
317         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
319         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
321         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
322       } else {
323         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
324         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
325       }
326     }
327     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
328     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
329     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
330   }
331 
332   /* Matrix of coarse basis functions (local) */
333   if (pcbddc->coarse_phi_B) {
334     PetscInt on_B,on_primal;
335     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
336     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
338       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
339     }
340   }
341   if (pcbddc->coarse_phi_D) {
342     PetscInt on_D,on_primal;
343     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
344     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
346       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
347     }
348   }
349   if (!pcbddc->coarse_phi_B) {
350     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
351     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
352     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
353     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
354   }
355   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
357     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
358     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
359     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
360   }
361 
362   if (pcbddc->dbg_flag) {
363     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
366   }
367   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371 
372   /* vertices */
373   for (i=0;i<n_vertices;i++) {
374     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
376     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
377     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
378     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
379     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
380     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
384     if (n_constraints) {
385       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
386       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
388     }
389     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
390     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
391 
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
395     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
399     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
401     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     if (n_constraints) {
413       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
416     }
417 
418     /* check */
419     if (pcbddc->dbg_flag) {
420       /* assemble subdomain vector on local nodes */
421       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
427       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
428       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
430       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       if (n_constraints) {
434         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       }
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
441       /* check saddle point solution */
442       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
445       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
446       /* shift by the identity matrix */
447       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
451     }
452   }
453 
454   /* constraints */
455   for (i=0;i<n_constraints;i++) {
456     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
457     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
458     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
459     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
460     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
462     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
463     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
464     if (n_vertices) {
465       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
466     }
467     /* Set values in coarse basis function and subdomain part of coarse_mat */
468     /* coarse basis functions */
469     j = i+n_vertices; /* don't touch this! */
470     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
471     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482     }
483     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484     if (n_vertices) {
485       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
486       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
488     }
489     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
490     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
492 
493     if (pcbddc->dbg_flag) {
494       /* assemble subdomain vector on nodes */
495       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
496       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
498       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
500       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
501       /* assemble subdomain vector of lagrange multipliers */
502       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
503       if (n_vertices) {
504         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
505         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
506         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
507       }
508       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
514       /* check saddle point solution */
515       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
518       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
519       /* shift by the identity matrix */
520       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
521       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
524     }
525   }
526   /* call assembling routines for local coarse basis */
527   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   }
533 
534   /* compute other basis functions for non-symmetric problems */
535   ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr);
536   if (!pcbddc->issym) {
537     if (!pcbddc->coarse_psi_B) {
538       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
539       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
540       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
541       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
542     }
543     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
545       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
546       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
547       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
548     }
549     for (i=0;i<pcbddc->local_primal_size;i++) {
550       if (n_constraints) {
551         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
552         for (j=0;j<n_constraints;j++) {
553           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
554         }
555         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
556         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
557       }
558       if (i<n_vertices) {
559         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
560         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
561         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
562         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
563         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
564         if (n_constraints) {
565           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
566         }
567       } else {
568         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
569       }
570       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
571       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
572       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
576       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       if (i<n_vertices) {
578         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
579       }
580       if (pcbddc->switch_static || pcbddc->dbg_flag) {
581         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
585         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586       }
587 
588       if (pcbddc->dbg_flag) {
589         /* assemble subdomain vector on nodes */
590         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
591         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
593         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         if (i<n_vertices) {
595           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
596         }
597         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
598         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
599         /* assemble subdomain vector of lagrange multipliers */
600         for (j=0;j<pcbddc->local_primal_size;j++) {
601           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
602         }
603         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
604         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
605         /* check saddle point solution */
606         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
609         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
610         /* shift by the identity matrix */
611         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
612         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
615       }
616     }
617     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     if (pcbddc->switch_static || pcbddc->dbg_flag) {
620       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622     }
623     unsymmetric_check = PETSC_TRUE;
624   } else { /* take references to already computed coarse basis */
625     unsymmetric_check = PETSC_FALSE;
626     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
627     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
628     if (pcbddc->coarse_phi_D) {
629       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
630       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
631     }
632   }
633   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
634   /* Checking coarse_sub_mat and coarse basis functios */
635   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
637   if (pcbddc->dbg_flag) {
638     Mat         coarse_sub_mat;
639     Mat         AUXMAT,TM1,TM2,TM3,TM4;
640     Mat         coarse_phi_D,coarse_phi_B;
641     Mat         coarse_psi_D,coarse_psi_B;
642     Mat         A_II,A_BB,A_IB,A_BI;
643     MatType     checkmattype=MATSEQAIJ;
644     PetscReal   real_value;
645 
646     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
649     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
651     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
652     if (unsymmetric_check) {
653       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
654       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
655     }
656     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
657 
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
659     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
660     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
661     if (unsymmetric_check) {
662       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
663       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
664       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
665       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
666       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
667       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
668       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674     } else {
675       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
676       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
677       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683     }
684     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
687     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
688     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
689     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
694     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
695     for (i=0;i<pcbddc->local_primal_size;i++) {
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
697     }
698     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
699     for (i=0;i<pcbddc->local_primal_size;i++) {
700       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
701     }
702     if (unsymmetric_check) {
703       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
704       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
705         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
706       }
707       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
708       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
709         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
710       }
711     }
712     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
716     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
720     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
722     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
723     if (unsymmetric_check) {
724       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
725       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
726     }
727     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
728     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
729     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
730     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
731   }
732   /* free memory */
733   if (n_vertices) {
734     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
735     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
738     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
739   }
740   if (n_constraints) {
741     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
742     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
743     ierr = MatDestroy(&M1);CHKERRQ(ierr);
744     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
745   }
746   ierr = PetscFree(auxindices);CHKERRQ(ierr);
747   /* get back data */
748   *coarse_submat_vals_n = coarse_submat_vals;
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
755 {
756   PC_IS*            pcis = (PC_IS*)(pc->data);
757   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
758   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
759   PetscBool         issbaij,isseqaij;
760   /* manage repeated solves */
761   MatReuse          reuse;
762   PetscErrorCode    ierr;
763 
764   PetscFunctionBegin;
765   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
766     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767   }
768   /* get mat flags */
769   reuse = MAT_INITIAL_MATRIX;
770   if (pc->setupcalled) {
771     if (pc->flag == SAME_NONZERO_PATTERN) {
772       reuse = MAT_REUSE_MATRIX;
773     } else {
774       reuse = MAT_INITIAL_MATRIX;
775     }
776   }
777   if (reuse == MAT_INITIAL_MATRIX) {
778     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
780     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
781     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
783   }
784 
785   /* transform local matrices if needed */
786   if (pcbddc->ChangeOfBasisMatrix) {
787     Mat         change_mat_all;
788     PetscScalar *row_cmat_values;
789     PetscInt    *row_cmat_indices;
790     PetscInt    *nnz,*is_indices,*temp_indices;
791     PetscInt    i,j,k,n_D,n_B;
792 
793     /* Get Non-overlapping dimensions */
794     n_B = pcis->n_B;
795     n_D = pcis->n-n_B;
796 
797     /* compute nonzero structure of change of basis on all local nodes */
798     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
799     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
800     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
801     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
802     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     k=1;
804     for (i=0;i<n_B;i++) {
805       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
806       nnz[is_indices[i]]=j;
807       if (k < j) k = j;
808       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
809     }
810     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     /* assemble change of basis matrix on the whole set of local dofs */
812     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
813     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
814     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
815     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
816     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
817     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     for (i=0;i<n_D;i++) {
819       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
822     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
823     for (i=0;i<n_B;i++) {
824       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
825       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
826       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
827       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
828     }
829     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
831     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
832     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
833     if (isseqaij) {
834       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
835     } else {
836       Mat work_mat;
837       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
838       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
839       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
840     }
841     /*
842     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
843     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
844     */
845     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
846     ierr = PetscFree(nnz);CHKERRQ(ierr);
847     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
848   } else {
849     /* without change of basis, the local matrix is unchanged */
850     if (!pcbddc->local_mat) {
851       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
852       pcbddc->local_mat = matis->A;
853     }
854   }
855 
856   /* get submatrices */
857   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
858   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
859   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
860   if (!issbaij) {
861     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
862     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
863   } else {
864     Mat newmat;
865     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
866     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
867     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
868     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
876 {
877   PC_IS*         pcis = (PC_IS*)(pc->data);
878   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
879   IS             is_aux1,is_aux2;
880   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
881   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
882   PetscInt       vbs,bs;
883   PetscBT        bitmask;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   /*
888     No need to setup local scatters if
889       - primal space is unchanged
890         AND
891       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
892         AND
893       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
894   */
895   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
896     PetscFunctionReturn(0);
897   }
898   /* destroy old objects */
899   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
900   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
901   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
902   /* Set Non-overlapping dimensions */
903   n_B = pcis->n_B; n_D = pcis->n - n_B;
904   n_vertices = pcbddc->n_actual_vertices;
905   /* create auxiliary bitmask */
906   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
907   for (i=0;i<n_vertices;i++) {
908     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
909   }
910 
911   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
912   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
913   for (i=0, n_R=0; i<pcis->n; i++) {
914     if (!PetscBTLookup(bitmask,i)) {
915       idx_R_local[n_R] = i;
916       n_R++;
917     }
918   }
919 
920   /* Block code */
921   vbs = 1;
922   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
923   if (bs>1 && !(n_vertices%bs)) {
924     PetscBool is_blocked = PETSC_TRUE;
925     PetscInt  *vary;
926     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
927     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
928     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
929     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
930     for (i=0; i<n_vertices; i++) {
931       if (vary[i]!=0 && vary[i]!=bs) {
932         is_blocked = PETSC_FALSE;
933         break;
934       }
935     }
936     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
937       vbs = bs;
938       for (i=0;i<n_R/vbs;i++) {
939         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
940       }
941     }
942     ierr = PetscFree(vary);CHKERRQ(ierr);
943   }
944   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
945   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
946 
947   /* print some info if requested */
948   if (pcbddc->dbg_flag) {
949     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
950     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
951     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
952     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
953     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957   }
958 
959   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
960   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
961   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
962   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
963   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
964   for (i=0; i<n_D; i++) {
965     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
966   }
967   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_R; i++) {
969     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
970       aux_array1[j++] = i;
971     }
972   }
973   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
974   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
975   for (i=0, j=0; i<n_B; i++) {
976     if (!PetscBTLookup(bitmask,is_indices[i])) {
977       aux_array2[j++] = i;
978     }
979   }
980   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
981   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
982   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
983   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
984   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
985 
986   if (pcbddc->switch_static || pcbddc->dbg_flag) {
987     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
988     for (i=0, j=0; i<n_R; i++) {
989       if (PetscBTLookup(bitmask,idx_R_local[i])) {
990         aux_array1[j++] = i;
991       }
992     }
993     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
994     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
995     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
996   }
997   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
998   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1006 {
1007   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1008   PC_IS          *pcis = (PC_IS*)pc->data;
1009   PC             pc_temp;
1010   Mat            A_RR;
1011   MatReuse       reuse;
1012   PetscScalar    m_one = -1.0;
1013   PetscReal      value;
1014   PetscInt       n_D,n_R,ibs,mbs;
1015   PetscBool      use_exact,use_exact_reduced,issbaij;
1016   PetscErrorCode ierr;
1017   /* prefixes stuff */
1018   char           dir_prefix[256],neu_prefix[256],str_level[16];
1019   size_t         len;
1020 
1021   PetscFunctionBegin;
1022 
1023   /* compute prefixes */
1024   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1025   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1026   if (!pcbddc->current_level) {
1027     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1028     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1029     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1030     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1031   } else {
1032     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1033     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1034     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1035     len -= 15; /* remove "pc_bddc_coarse_" */
1036     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1037     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1038     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1039     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1040     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1041     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1042     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1043     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1044   }
1045 
1046   /* DIRICHLET PROBLEM */
1047   /* Matrix for Dirichlet problem is pcis->A_II */
1048   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1049   if (!pcbddc->ksp_D) { /* create object if not yet build */
1050     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1051     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1052     /* default */
1053     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1054     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1055     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1056     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1057     if (issbaij) {
1058       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1059     } else {
1060       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1061     }
1062     /* Allow user's customization */
1063     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1064     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1065   }
1066   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1067   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1068   if (!n_D) {
1069     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1070     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1071   }
1072   /* Set Up KSP for Dirichlet problem of BDDC */
1073   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1074   /* set ksp_D into pcis data */
1075   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1076   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1077   pcis->ksp_D = pcbddc->ksp_D;
1078 
1079   /* NEUMANN PROBLEM */
1080   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1081   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1082   if (pcbddc->ksp_R) { /* already created ksp */
1083     PetscInt nn_R;
1084     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1085     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1086     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1087     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1088       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1089       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090       reuse = MAT_INITIAL_MATRIX;
1091     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1092       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1093         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1094         reuse = MAT_INITIAL_MATRIX;
1095       } else { /* safe to reuse the matrix */
1096         reuse = MAT_REUSE_MATRIX;
1097       }
1098     }
1099     /* last check */
1100     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1101       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1102       reuse = MAT_INITIAL_MATRIX;
1103     }
1104   } else { /* first time, so we need to create the matrix */
1105     reuse = MAT_INITIAL_MATRIX;
1106   }
1107   /* extract A_RR */
1108   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1109   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1110   if (ibs != mbs) {
1111     Mat newmat;
1112     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1113     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1114     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1115   } else {
1116     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1117   }
1118   if (!pcbddc->ksp_R) { /* create object if not present */
1119     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1120     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1121     /* default */
1122     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1123     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1124     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1125     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1126     if (issbaij) {
1127       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1128     } else {
1129       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1130     }
1131     /* Allow user's customization */
1132     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1133     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1134   }
1135   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1136   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1137   if (!n_R) {
1138     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1139     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1140   }
1141   /* Set Up KSP for Neumann problem of BDDC */
1142   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1143 
1144   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1145   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1146     /* Dirichlet */
1147     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1148     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1149     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1150     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1151     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1152     /* need to be adapted? */
1153     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1154     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1155     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1156     /* print info */
1157     if (pcbddc->dbg_flag) {
1158       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1159       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1160       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1161       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1162       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1163       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1164     }
1165     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1167     }
1168 
1169     /* Neumann */
1170     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1171     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1172     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1173     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1174     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1175     /* need to be adapted? */
1176     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1177     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1178     /* print info */
1179     if (pcbddc->dbg_flag) {
1180       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1181       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182     }
1183     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1184       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1185     }
1186   }
1187   /* free Neumann problem's matrix */
1188   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1194 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1195 {
1196   PetscErrorCode ierr;
1197   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1198 
1199   PetscFunctionBegin;
1200   if (applytranspose) {
1201     if (pcbddc->local_auxmat1) {
1202       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1203       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1204     }
1205     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1206   } else {
1207     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1208     if (pcbddc->local_auxmat1) {
1209       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1210       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1211     }
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1219 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1220 {
1221   PetscErrorCode ierr;
1222   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1223   PC_IS*            pcis = (PC_IS*)  (pc->data);
1224   const PetscScalar zero = 0.0;
1225 
1226   PetscFunctionBegin;
1227   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1228   if (applytranspose) {
1229     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1230     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1231   } else {
1232     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1233     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1234   }
1235   /* start communications from local primal nodes to rhs of coarse solver */
1236   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1237   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1241   /* TODO remove null space when doing multilevel */
1242   if (pcbddc->coarse_ksp) {
1243     if (applytranspose) {
1244       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1245     } else {
1246       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1247     }
1248   }
1249   /* start communications from coarse solver solution to local primal nodes */
1250   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1251 
1252   /* Local solution on R nodes */
1253   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1254   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1256   if (pcbddc->switch_static) {
1257     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259   }
1260   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1261   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1262   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   if (pcbddc->switch_static) {
1265     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   }
1268 
1269   /* complete communications from coarse sol to local primal nodes */
1270   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1271 
1272   /* Sum contributions from two levels */
1273   if (applytranspose) {
1274     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1275     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1276   } else {
1277     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1278     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1284 #undef __FUNCT__
1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1287 {
1288   PetscErrorCode ierr;
1289   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1290   PetscScalar    *array,*array2;
1291   Vec            from,to;
1292 
1293   PetscFunctionBegin;
1294   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1295     from = pcbddc->coarse_vec;
1296     to = pcbddc->vec1_P;
1297     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1298       Vec tvec;
1299       PetscInt lsize;
1300       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1301       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1302       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1303       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1304       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1305       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1306       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1307     }
1308   } else { /* from local to global -> put data in coarse right hand side */
1309     from = pcbddc->vec1_P;
1310     to = pcbddc->coarse_vec;
1311   }
1312   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1319 {
1320   PetscErrorCode ierr;
1321   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1322   PetscScalar    *array,*array2;
1323   Vec            from,to;
1324 
1325   PetscFunctionBegin;
1326   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1327     from = pcbddc->coarse_vec;
1328     to = pcbddc->vec1_P;
1329   } else { /* from local to global -> put data in coarse right hand side */
1330     from = pcbddc->vec1_P;
1331     to = pcbddc->coarse_vec;
1332   }
1333   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1334   if (smode == SCATTER_FORWARD) {
1335     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1336       Vec tvec;
1337       PetscInt lsize;
1338       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1339       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1340       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1341       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1342       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1343       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1344       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1345     }
1346   }
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 /* uncomment for testing purposes */
1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1352 #undef __FUNCT__
1353 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1355 {
1356   PetscErrorCode    ierr;
1357   PC_IS*            pcis = (PC_IS*)(pc->data);
1358   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1359   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1360   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1361   MatType           impMatType=MATSEQAIJ;
1362   /* one and zero */
1363   PetscScalar       one=1.0,zero=0.0;
1364   /* space to store constraints and their local indices */
1365   PetscScalar       *temp_quadrature_constraint;
1366   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1367   /* iterators */
1368   PetscInt          i,j,k,total_counts,temp_start_ptr;
1369   /* stuff to store connected components stored in pcbddc->mat_graph */
1370   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1371   PetscInt          n_ISForFaces,n_ISForEdges;
1372   /* near null space stuff */
1373   MatNullSpace      nearnullsp;
1374   const Vec         *nearnullvecs;
1375   Vec               *localnearnullsp;
1376   PetscBool         nnsp_has_cnst;
1377   PetscInt          nnsp_size;
1378   PetscScalar       *array;
1379   /* BLAS integers */
1380   PetscBLASInt      lwork,lierr;
1381   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1382   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1383   /* LAPACK working arrays for SVD or POD */
1384   PetscBool         skip_lapack;
1385   PetscScalar       *work;
1386   PetscReal         *singular_vals;
1387 #if defined(PETSC_USE_COMPLEX)
1388   PetscReal         *rwork;
1389 #endif
1390 #if defined(PETSC_MISSING_LAPACK_GESVD)
1391   PetscBLASInt      Blas_one_2=1;
1392   PetscScalar       *temp_basis,*correlation_mat;
1393 #else
1394   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1395   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1396 #endif
1397   /* reuse */
1398   PetscInt          olocal_primal_size;
1399   PetscInt          *oprimal_indices_local_idxs;
1400   /* change of basis */
1401   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1402   PetscBool         boolforchange,qr_needed;
1403   PetscBT           touched,change_basis,qr_needed_idx;
1404   /* auxiliary stuff */
1405   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1406   /* some quantities */
1407   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1408   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1409 
1410 
1411   PetscFunctionBegin;
1412   /* Destroy Mat objects computed previously */
1413   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1414   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1415   /* Get index sets for faces, edges and vertices from graph */
1416   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1417   /* free unneeded index sets */
1418   if (!pcbddc->use_vertices) {
1419     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1420   }
1421   if (!pcbddc->use_edges) {
1422     for (i=0;i<n_ISForEdges;i++) {
1423       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1424     }
1425     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1426     n_ISForEdges = 0;
1427   }
1428   if (!pcbddc->use_faces) {
1429     for (i=0;i<n_ISForFaces;i++) {
1430       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1431     }
1432     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1433     n_ISForFaces = 0;
1434   }
1435   /* HACKS (the following two blocks of code) */
1436   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1437     pcbddc->use_change_of_basis = PETSC_TRUE;
1438     if (!ISForEdges) {
1439       pcbddc->use_change_on_faces = PETSC_TRUE;
1440     }
1441   }
1442   if (pcbddc->NullSpace) {
1443     /* use_change_of_basis should be consistent among processors */
1444     PetscBool tbool[2],gbool[2];
1445     tbool [0] = pcbddc->use_change_of_basis;
1446     tbool [1] = pcbddc->use_change_on_faces;
1447     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1448     pcbddc->use_change_of_basis = gbool[0];
1449     pcbddc->use_change_on_faces = gbool[1];
1450   }
1451   /* print some info */
1452   if (pcbddc->dbg_flag) {
1453     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1454     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1455     i = 0;
1456     if (ISForVertices) {
1457       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1458     }
1459     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1460     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1461     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1462     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1463   }
1464   /* check if near null space is attached to global mat */
1465   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1466   if (nearnullsp) {
1467     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1468     /* remove any stored info */
1469     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1470     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1471     /* store information for BDDC solver reuse */
1472     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1473     pcbddc->onearnullspace = nearnullsp;
1474     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1475     for (i=0;i<nnsp_size;i++) {
1476       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1477     }
1478   } else { /* if near null space is not provided BDDC uses constants by default */
1479     nnsp_size = 0;
1480     nnsp_has_cnst = PETSC_TRUE;
1481   }
1482   /* get max number of constraints on a single cc */
1483   max_constraints = nnsp_size;
1484   if (nnsp_has_cnst) max_constraints++;
1485 
1486   /*
1487        Evaluate maximum storage size needed by the procedure
1488        - temp_indices will contain start index of each constraint stored as follows
1489        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1490        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1491        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1492                                                                                                                                                          */
1493   total_counts = n_ISForFaces+n_ISForEdges;
1494   total_counts *= max_constraints;
1495   n_vertices = 0;
1496   if (ISForVertices) {
1497     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1498   }
1499   total_counts += n_vertices;
1500   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1501   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1502   total_counts = 0;
1503   max_size_of_constraint = 0;
1504   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1505     if (i<n_ISForEdges) {
1506       used_IS = &ISForEdges[i];
1507     } else {
1508       used_IS = &ISForFaces[i-n_ISForEdges];
1509     }
1510     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1511     total_counts += j;
1512     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1513   }
1514   total_counts *= max_constraints;
1515   total_counts += n_vertices;
1516   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1517   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1518   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1519   /* get local part of global near null space vectors */
1520   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1521   for (k=0;k<nnsp_size;k++) {
1522     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1523     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525   }
1526 
1527   /* whether or not to skip lapack calls */
1528   skip_lapack = PETSC_TRUE;
1529   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1530 
1531   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1532   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1533     PetscScalar temp_work;
1534 #if defined(PETSC_MISSING_LAPACK_GESVD)
1535     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1536     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1537     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1538     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1539 #if defined(PETSC_USE_COMPLEX)
1540     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1541 #endif
1542     /* now we evaluate the optimal workspace using query with lwork=-1 */
1543     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1544     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1545     lwork = -1;
1546     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1547 #if !defined(PETSC_USE_COMPLEX)
1548     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1549 #else
1550     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1551 #endif
1552     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1553     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1554 #else /* on missing GESVD */
1555     /* SVD */
1556     PetscInt max_n,min_n;
1557     max_n = max_size_of_constraint;
1558     min_n = max_constraints;
1559     if (max_size_of_constraint < max_constraints) {
1560       min_n = max_size_of_constraint;
1561       max_n = max_constraints;
1562     }
1563     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1564 #if defined(PETSC_USE_COMPLEX)
1565     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1566 #endif
1567     /* now we evaluate the optimal workspace using query with lwork=-1 */
1568     lwork = -1;
1569     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1570     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1571     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1572     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1573 #if !defined(PETSC_USE_COMPLEX)
1574     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1575 #else
1576     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1577 #endif
1578     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1579     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1580 #endif /* on missing GESVD */
1581     /* Allocate optimal workspace */
1582     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1583     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1584   }
1585   /* Now we can loop on constraining sets */
1586   total_counts = 0;
1587   temp_indices[0] = 0;
1588   /* vertices */
1589   if (ISForVertices) {
1590     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1591     if (nnsp_has_cnst) { /* consider all vertices */
1592       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1593       for (i=0;i<n_vertices;i++) {
1594         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1595         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1596         total_counts++;
1597       }
1598     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1599       PetscBool used_vertex;
1600       for (i=0;i<n_vertices;i++) {
1601         used_vertex = PETSC_FALSE;
1602         k = 0;
1603         while (!used_vertex && k<nnsp_size) {
1604           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1605           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1606             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1607             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1608             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1609             total_counts++;
1610             used_vertex = PETSC_TRUE;
1611           }
1612           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1613           k++;
1614         }
1615       }
1616     }
1617     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1618     n_vertices = total_counts;
1619   }
1620 
1621   /* edges and faces */
1622   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1623     if (i<n_ISForEdges) {
1624       used_IS = &ISForEdges[i];
1625       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1626     } else {
1627       used_IS = &ISForFaces[i-n_ISForEdges];
1628       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1629     }
1630     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1631     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1632     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1633     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1634     /* change of basis should not be performed on local periodic nodes */
1635     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1636     if (nnsp_has_cnst) {
1637       PetscScalar quad_value;
1638       temp_constraints++;
1639       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1640       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1641       for (j=0;j<size_of_constraint;j++) {
1642         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1643       }
1644       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1645       total_counts++;
1646     }
1647     for (k=0;k<nnsp_size;k++) {
1648       PetscReal real_value;
1649       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1650       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1651       for (j=0;j<size_of_constraint;j++) {
1652         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1653       }
1654       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1655       /* check if array is null on the connected component */
1656       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1657       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1658       if (real_value > 0.0) { /* keep indices and values */
1659         temp_constraints++;
1660         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1661         total_counts++;
1662       }
1663     }
1664     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1665     valid_constraints = temp_constraints;
1666     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1667     if (!pcbddc->use_nnsp_true && temp_constraints) {
1668       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1669 
1670 #if defined(PETSC_MISSING_LAPACK_GESVD)
1671       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1672          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1673          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1674             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1675             from that computed using LAPACKgesvd
1676          -> This is due to a different computation of eigenvectors in LAPACKheev
1677          -> The quality of the POD-computed basis will be the same */
1678       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1679       /* Store upper triangular part of correlation matrix */
1680       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1681       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1682       for (j=0;j<temp_constraints;j++) {
1683         for (k=0;k<j+1;k++) {
1684           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1685         }
1686       }
1687       /* compute eigenvalues and eigenvectors of correlation matrix */
1688       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1689       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1690 #if !defined(PETSC_USE_COMPLEX)
1691       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1692 #else
1693       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1694 #endif
1695       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1696       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1697       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1698       j=0;
1699       while (j < temp_constraints && singular_vals[j] < tol) j++;
1700       total_counts=total_counts-j;
1701       valid_constraints = temp_constraints-j;
1702       /* scale and copy POD basis into used quadrature memory */
1703       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1704       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1705       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1706       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1707       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1708       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1709       if (j<temp_constraints) {
1710         PetscInt ii;
1711         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1712         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1713         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1714         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1715         for (k=0;k<temp_constraints-j;k++) {
1716           for (ii=0;ii<size_of_constraint;ii++) {
1717             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1718           }
1719         }
1720       }
1721 #else  /* on missing GESVD */
1722       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1723       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1724       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1725       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1726 #if !defined(PETSC_USE_COMPLEX)
1727       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1728 #else
1729       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1730 #endif
1731       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1732       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1733       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1734       k = temp_constraints;
1735       if (k > size_of_constraint) k = size_of_constraint;
1736       j = 0;
1737       while (j < k && singular_vals[k-j-1] < tol) j++;
1738       total_counts = total_counts-temp_constraints+k-j;
1739       valid_constraints = k-j;
1740 #endif /* on missing GESVD */
1741     }
1742     /* setting change_of_basis flag is safe now */
1743     if (boolforchange) {
1744       for (j=0;j<valid_constraints;j++) {
1745         PetscBTSet(change_basis,total_counts-j-1);
1746       }
1747     }
1748   }
1749   /* free index sets of faces, edges and vertices */
1750   for (i=0;i<n_ISForFaces;i++) {
1751     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1752   }
1753   if (n_ISForFaces) {
1754     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1755   }
1756   for (i=0;i<n_ISForEdges;i++) {
1757     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1758   }
1759   if (n_ISForEdges) {
1760     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1761   }
1762   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1763   /* map temp_indices_to_constraint in boundary numbering */
1764   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1765   if (i != temp_indices[total_counts]) {
1766     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1767   }
1768 
1769   /* free workspace */
1770   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1771     ierr = PetscFree(work);CHKERRQ(ierr);
1772 #if defined(PETSC_USE_COMPLEX)
1773     ierr = PetscFree(rwork);CHKERRQ(ierr);
1774 #endif
1775     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1776 #if defined(PETSC_MISSING_LAPACK_GESVD)
1777     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1778     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1779 #endif
1780   }
1781   for (k=0;k<nnsp_size;k++) {
1782     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1783   }
1784   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1785 
1786   /* set quantities in pcbddc data structure and store previous primal size */
1787   /* n_vertices defines the number of subdomain corners in the primal space */
1788   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1789   olocal_primal_size = pcbddc->local_primal_size;
1790   pcbddc->local_primal_size = total_counts;
1791   pcbddc->n_vertices = n_vertices;
1792   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1793 
1794   /* Create constraint matrix */
1795   /* The constraint matrix is used to compute the l2g map of primal dofs */
1796   /* so we need to set it up properly either with or without change of basis */
1797   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1798   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1799   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1800   /* array to compute a local numbering of constraints : vertices first then constraints */
1801   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1802   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1803   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1804   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1805   /* auxiliary stuff for basis change */
1806   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1807   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1808 
1809   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1810   total_primal_vertices=0;
1811   for (i=0;i<pcbddc->local_primal_size;i++) {
1812     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1813     if (size_of_constraint == 1) {
1814       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1815       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1816       aux_primal_minloc[total_primal_vertices]=0;
1817       total_primal_vertices++;
1818     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1819       PetscInt min_loc,min_index;
1820       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1821       /* find first untouched local node */
1822       k = 0;
1823       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1824       min_index = global_indices[k];
1825       min_loc = k;
1826       /* search the minimum among global nodes already untouched on the cc */
1827       for (k=1;k<size_of_constraint;k++) {
1828         /* there can be more than one constraint on a single connected component */
1829         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1830           min_index = global_indices[k];
1831           min_loc = k;
1832         }
1833       }
1834       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1835       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1836       aux_primal_minloc[total_primal_vertices]=min_loc;
1837       total_primal_vertices++;
1838     }
1839   }
1840   /* determine if a QR strategy is needed for change of basis */
1841   qr_needed = PETSC_FALSE;
1842   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1843   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1844     if (PetscBTLookup(change_basis,i)) {
1845       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1846       j = 0;
1847       for (k=0;k<size_of_constraint;k++) {
1848         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1849           j++;
1850         }
1851       }
1852       /* found more than one primal dof on the cc */
1853       if (j > 1) {
1854         PetscBTSet(qr_needed_idx,i);
1855         qr_needed = PETSC_TRUE;
1856       }
1857     }
1858   }
1859   /* free workspace */
1860   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1861 
1862   /* permute indices in order to have a sorted set of vertices */
1863   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1864 
1865   /* nonzero structure of constraint matrix */
1866   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1867   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1868   j=total_primal_vertices;
1869   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1870     if (!PetscBTLookup(change_basis,i)) {
1871       nnz[j]=temp_indices[i+1]-temp_indices[i];
1872       j++;
1873     }
1874   }
1875   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1876   ierr = PetscFree(nnz);CHKERRQ(ierr);
1877   /* set values in constraint matrix */
1878   for (i=0;i<total_primal_vertices;i++) {
1879     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1880   }
1881   total_counts = total_primal_vertices;
1882   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1883     if (!PetscBTLookup(change_basis,i)) {
1884       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1885       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1886       total_counts++;
1887     }
1888   }
1889   /* assembling */
1890   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1891   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1892   /*
1893   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1894   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1895   */
1896   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1897   if (pcbddc->use_change_of_basis) {
1898     /* dual and primal dofs on a single cc */
1899     PetscInt     dual_dofs,primal_dofs;
1900     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1901     PetscInt     primal_counter;
1902     /* working stuff for GEQRF */
1903     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1904     PetscBLASInt lqr_work;
1905     /* working stuff for UNGQR */
1906     PetscScalar  *gqr_work,lgqr_work_t;
1907     PetscBLASInt lgqr_work;
1908     /* working stuff for TRTRS */
1909     PetscScalar  *trs_rhs;
1910     PetscBLASInt Blas_NRHS;
1911     /* pointers for values insertion into change of basis matrix */
1912     PetscInt     *start_rows,*start_cols;
1913     PetscScalar  *start_vals;
1914     /* working stuff for values insertion */
1915     PetscBT      is_primal;
1916 
1917     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1918     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1919     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1920     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1921     /* work arrays */
1922     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1923     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1924     /* nonzeros per row */
1925     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1926       if (PetscBTLookup(change_basis,i)) {
1927         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1928         if (PetscBTLookup(qr_needed_idx,i)) {
1929           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1930         } else {
1931           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1932           /* get local primal index on the cc */
1933           j = 0;
1934           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1935           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1936         }
1937       }
1938     }
1939     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1940     ierr = PetscFree(nnz);CHKERRQ(ierr);
1941     /* Set initial identity in the matrix */
1942     for (i=0;i<pcis->n_B;i++) {
1943       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1944     }
1945 
1946     if (pcbddc->dbg_flag) {
1947       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1948       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1949     }
1950 
1951 
1952     /* Now we loop on the constraints which need a change of basis */
1953     /*
1954        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1955        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1956 
1957        Basic blocks of change of basis matrix T computed
1958 
1959           - Using the following block transformation if there is only a primal dof on the cc
1960             (in the example, primal dof is the last one of the edge in LOCAL ordering
1961              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1962             | 1        0   ...        0     1 |
1963             | 0        1   ...        0     1 |
1964             |              ...                |
1965             | 0        ...            1     1 |
1966             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1967 
1968           - via QR decomposition of constraints otherwise
1969     */
1970     if (qr_needed) {
1971       /* space to store Q */
1972       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1973       /* first we issue queries for optimal work */
1974       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1975       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1976       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1977       lqr_work = -1;
1978       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1979       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1980       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1981       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1982       lgqr_work = -1;
1983       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1984       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1985       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1986       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1987       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1988       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1989       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1990       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1991       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1992       /* array to store scaling factors for reflectors */
1993       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
1994       /* array to store rhs and solution of triangular solver */
1995       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
1996       /* allocating workspace for check */
1997       if (pcbddc->dbg_flag) {
1998         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
1999       }
2000     }
2001     /* array to store whether a node is primal or not */
2002     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2003     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2004     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2005     if (i != total_primal_vertices) {
2006       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2007     }
2008     for (i=0;i<total_primal_vertices;i++) {
2009       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2010     }
2011     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2012 
2013     /* loop on constraints and see whether or not they need a change of basis and compute it */
2014     /* -> using implicit ordering contained in temp_indices data */
2015     total_counts = pcbddc->n_vertices;
2016     primal_counter = total_counts;
2017     while (total_counts<pcbddc->local_primal_size) {
2018       primal_dofs = 1;
2019       if (PetscBTLookup(change_basis,total_counts)) {
2020         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2021         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2022           primal_dofs++;
2023         }
2024         /* get constraint info */
2025         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2026         dual_dofs = size_of_constraint-primal_dofs;
2027 
2028         if (pcbddc->dbg_flag) {
2029           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2030         }
2031 
2032         if (primal_dofs > 1) { /* QR */
2033 
2034           /* copy quadrature constraints for change of basis check */
2035           if (pcbddc->dbg_flag) {
2036             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2037           }
2038           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2039           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2040 
2041           /* compute QR decomposition of constraints */
2042           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2043           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2044           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2045           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2046           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2047           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2048           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2049 
2050           /* explictly compute R^-T */
2051           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2052           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2053           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2056           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2057           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2058           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2059           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2060           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2061 
2062           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2063           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2064           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2065           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2066           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2067           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2068           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2069           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2070           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2071 
2072           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2073              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2074              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2075           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2077           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2078           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2080           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2081           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2082           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2083           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2084           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2085 
2086           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2087           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2088           /* insert cols for primal dofs */
2089           for (j=0;j<primal_dofs;j++) {
2090             start_vals = &qr_basis[j*size_of_constraint];
2091             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2092             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2093           }
2094           /* insert cols for dual dofs */
2095           for (j=0,k=0;j<dual_dofs;k++) {
2096             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2097               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2098               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2099               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2100               j++;
2101             }
2102           }
2103 
2104           /* check change of basis */
2105           if (pcbddc->dbg_flag) {
2106             PetscInt   ii,jj;
2107             PetscBool valid_qr=PETSC_TRUE;
2108             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2109             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2110             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2111             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2112             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2113             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2114             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2115             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2116             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2117             for (jj=0;jj<size_of_constraint;jj++) {
2118               for (ii=0;ii<primal_dofs;ii++) {
2119                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2120                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2121               }
2122             }
2123             if (!valid_qr) {
2124               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2125               for (jj=0;jj<size_of_constraint;jj++) {
2126                 for (ii=0;ii<primal_dofs;ii++) {
2127                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2128                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2129                   }
2130                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2131                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2132                   }
2133                 }
2134               }
2135             } else {
2136               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2137             }
2138           }
2139         } else { /* simple transformation block */
2140           PetscInt row,col;
2141           PetscScalar val;
2142           for (j=0;j<size_of_constraint;j++) {
2143             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2144             if (!PetscBTLookup(is_primal,row)) {
2145               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2146               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2147               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2148             } else {
2149               for (k=0;k<size_of_constraint;k++) {
2150                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2151                 if (row != col) {
2152                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2153                 } else {
2154                   val = 1.0;
2155                 }
2156                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2157               }
2158             }
2159           }
2160           if (pcbddc->dbg_flag) {
2161             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2162           }
2163         }
2164         /* increment primal counter */
2165         primal_counter += primal_dofs;
2166       } else {
2167         if (pcbddc->dbg_flag) {
2168           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2169         }
2170       }
2171       /* increment constraint counter total_counts */
2172       total_counts += primal_dofs;
2173     }
2174 
2175     /* free workspace */
2176     if (qr_needed) {
2177       if (pcbddc->dbg_flag) {
2178         ierr = PetscFree(work);CHKERRQ(ierr);
2179       }
2180       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2181       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2182       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2183       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2184       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2185     }
2186     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2187     /* assembling */
2188     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2189     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2190     /*
2191     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2192     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2193     */
2194   }
2195   /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2196   if (pcbddc->user_ChangeOfBasisMatrix) {
2197     PetscInt rows,cols;
2198     ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr);
2199     if (rows == pcis->n && cols == pcis->n) {
2200       ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2201     } else {
2202       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2203       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2204     }
2205   }
2206 
2207   /* get indices in local ordering for vertices and constraints */
2208   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2209     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2210     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2211   }
2212   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2213   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2214   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2215   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2216   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2217   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2218   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2219   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2220   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2221   /* set quantities in PCBDDC data struct */
2222   pcbddc->n_actual_vertices = i;
2223   /* check if a new primal space has been introduced */
2224   pcbddc->new_primal_space_local = PETSC_TRUE;
2225   if (olocal_primal_size == pcbddc->local_primal_size) {
2226     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2227     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2228     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2229   }
2230   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2231   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2232 
2233   /* flush dbg viewer */
2234   if (pcbddc->dbg_flag) {
2235     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2236   }
2237 
2238   /* free workspace */
2239   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2240   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2241   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2242   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2243   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2244   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2245   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2246   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2252 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2253 {
2254   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2255   PC_IS       *pcis = (PC_IS*)pc->data;
2256   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2257   PetscInt    ierr,i,vertex_size;
2258   PetscViewer viewer=pcbddc->dbg_viewer;
2259 
2260   PetscFunctionBegin;
2261   /* Reset previously computed graph */
2262   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2263   /* Init local Graph struct */
2264   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2265 
2266   /* Check validity of the csr graph passed in by the user */
2267   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2268     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2269   }
2270 
2271   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2272   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2273     Mat mat_adj;
2274     const PetscInt *xadj,*adjncy;
2275     PetscBool flg_row=PETSC_TRUE;
2276 
2277     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2278     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2279     if (!flg_row) {
2280       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2281     }
2282     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2283     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2284     if (!flg_row) {
2285       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2286     }
2287     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2288     pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2289   }
2290 
2291   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2292   vertex_size = 1;
2293   if (pcbddc->user_provided_isfordofs) {
2294     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2295       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2296       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2297         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2298         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2299       }
2300       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2301       pcbddc->n_ISForDofs = 0;
2302       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2303     }
2304     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2305     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2306   } else {
2307     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2308       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2309       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2310       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2311         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2312       }
2313     }
2314   }
2315 
2316   /* Setup of Graph */
2317   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2318     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2319   }
2320   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2321     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2322   }
2323   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2324 
2325   /* Graph's connected components analysis */
2326   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2327 
2328   /* print some info to stdout */
2329   if (pcbddc->dbg_flag) {
2330     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2331   }
2332 
2333   /* mark topography has done */
2334   pcbddc->recompute_topography = PETSC_FALSE;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2340 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2341 {
2342   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2343   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2344   PetscErrorCode ierr;
2345 
2346   PetscFunctionBegin;
2347   n = 0;
2348   vertices = 0;
2349   if (pcbddc->ConstraintMatrix) {
2350     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2351     for (i=0;i<local_primal_size;i++) {
2352       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2353       if (size_of_constraint == 1) n++;
2354       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2355     }
2356     if (vertices_idx) {
2357       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2358       n = 0;
2359       for (i=0;i<local_primal_size;i++) {
2360         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2361         if (size_of_constraint == 1) {
2362           vertices[n++]=row_cmat_indices[0];
2363         }
2364         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2365       }
2366     }
2367   }
2368   *n_vertices = n;
2369   if (vertices_idx) *vertices_idx = vertices;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2375 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2376 {
2377   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2378   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2379   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2380   PetscBT        touched;
2381   PetscErrorCode ierr;
2382 
2383     /* This function assumes that the number of local constraints per connected component
2384        is not greater than the number of nodes defined for the connected component
2385        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2386   PetscFunctionBegin;
2387   n = 0;
2388   constraints_index = 0;
2389   if (pcbddc->ConstraintMatrix) {
2390     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2391     max_size_of_constraint = 0;
2392     for (i=0;i<local_primal_size;i++) {
2393       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2394       if (size_of_constraint > 1) {
2395         n++;
2396       }
2397       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2398       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2399     }
2400     if (constraints_idx) {
2401       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2402       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2403       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2404       n = 0;
2405       for (i=0;i<local_primal_size;i++) {
2406         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2407         if (size_of_constraint > 1) {
2408           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2409           /* find first untouched local node */
2410           j = 0;
2411           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2412           min_index = row_cmat_global_indices[j];
2413           min_loc = j;
2414           /* search the minimum among nodes not yet touched on the connected component
2415              since there can be more than one constraint on a single cc */
2416           for (j=1;j<size_of_constraint;j++) {
2417             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2418               min_index = row_cmat_global_indices[j];
2419               min_loc = j;
2420             }
2421           }
2422           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2423           constraints_index[n++] = row_cmat_indices[min_loc];
2424         }
2425         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2426       }
2427       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2428       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2429     }
2430   }
2431   *n_constraints = n;
2432   if (constraints_idx) *constraints_idx = constraints_index;
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 /* the next two functions has been adapted from pcis.c */
2437 #undef __FUNCT__
2438 #define __FUNCT__ "PCBDDCApplySchur"
2439 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2440 {
2441   PetscErrorCode ierr;
2442   PC_IS          *pcis = (PC_IS*)(pc->data);
2443 
2444   PetscFunctionBegin;
2445   if (!vec2_B) { vec2_B = v; }
2446   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2447   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2448   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2449   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2450   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2456 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2457 {
2458   PetscErrorCode ierr;
2459   PC_IS          *pcis = (PC_IS*)(pc->data);
2460 
2461   PetscFunctionBegin;
2462   if (!vec2_B) { vec2_B = v; }
2463   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2464   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2465   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2466   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2467   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "PCBDDCSubsetNumbering"
2473 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2474 {
2475   Vec            local_vec,global_vec;
2476   IS             seqis,paris;
2477   VecScatter     scatter_ctx;
2478   PetscScalar    *array;
2479   PetscInt       *temp_global_dofs;
2480   PetscScalar    globalsum;
2481   PetscInt       i,j,s;
2482   PetscInt       nlocals,first_index,old_index,max_local;
2483   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2484   PetscMPIInt    *dof_sizes,*dof_displs;
2485   PetscBool      first_found;
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   /* mpi buffers */
2490   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2491   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2492   j = ( !rank_prec_comm ? size_prec_comm : 0);
2493   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2494   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2495   /* get maximum size of subset */
2496   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2497   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2498   max_local = 0;
2499   for (i=0;i<n_local_dofs;i++) {
2500     if (max_local < temp_global_dofs[i] ) {
2501       max_local = temp_global_dofs[i];
2502     }
2503   }
2504   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2505   max_global++;
2506   max_local = 0;
2507   for (i=0;i<n_local_dofs;i++) {
2508     if (max_local < local_dofs[i] ) {
2509       max_local = local_dofs[i];
2510     }
2511   }
2512   max_local++;
2513   /* allocate workspace */
2514   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2515   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2516   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2517   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2518   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2519   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2520   /* create scatter */
2521   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2522   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2523   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2524   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2525   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2526   /* init array */
2527   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2528   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2529   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2530   if (local_dofs_mult) {
2531     for (i=0;i<n_local_dofs;i++) {
2532       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2533     }
2534   } else {
2535     for (i=0;i<n_local_dofs;i++) {
2536       array[local_dofs[i]]=1.0;
2537     }
2538   }
2539   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2540   /* scatter into global vec and get total number of global dofs */
2541   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2542   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2543   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2544   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2545   /* Fill global_vec with cumulative function for global numbering */
2546   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2547   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2548   nlocals = 0;
2549   first_index = -1;
2550   first_found = PETSC_FALSE;
2551   for (i=0;i<s;i++) {
2552     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2553       first_found = PETSC_TRUE;
2554       first_index = i;
2555     }
2556     nlocals += (PetscInt)PetscRealPart(array[i]);
2557   }
2558   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2559   if (!rank_prec_comm) {
2560     dof_displs[0]=0;
2561     for (i=1;i<size_prec_comm;i++) {
2562       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2563     }
2564   }
2565   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2566   if (first_found) {
2567     array[first_index] += (PetscScalar)nlocals;
2568     old_index = first_index;
2569     for (i=first_index+1;i<s;i++) {
2570       if (PetscRealPart(array[i]) > 0.1) {
2571         array[i] += array[old_index];
2572         old_index = i;
2573       }
2574     }
2575   }
2576   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2577   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2578   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2579   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2580   /* get global ordering of local dofs */
2581   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2582   if (local_dofs_mult) {
2583     for (i=0;i<n_local_dofs;i++) {
2584       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2585     }
2586   } else {
2587     for (i=0;i<n_local_dofs;i++) {
2588       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2589     }
2590   }
2591   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2592   /* free workspace */
2593   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2594   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2595   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2596   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2597   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2598   /* return pointer to global ordering of local dofs */
2599   *global_numbering_subset = temp_global_dofs;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2605 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2606 {
2607   PetscInt       i,j;
2608   PetscScalar    *alphas;
2609   PetscErrorCode ierr;
2610 
2611   PetscFunctionBegin;
2612   /* this implements stabilized Gram-Schmidt */
2613   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2614   for (i=0;i<n;i++) {
2615     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2616     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2617     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2618   }
2619   ierr = PetscFree(alphas);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatISGetSubassemblingPattern"
2625 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2626 {
2627   Mat             subdomain_adj;
2628   IS              new_ranks,ranks_send_to;
2629   MatPartitioning partitioner;
2630   Mat_IS          *matis;
2631   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2632   PetscInt        prank;
2633   PetscMPIInt     size,rank,color;
2634   PetscInt        *xadj,*adjncy,*oldranks;
2635   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2636   PetscInt        i,j,local_size,threshold=0;
2637   PetscErrorCode  ierr;
2638   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2639   PetscSubcomm    subcomm;
2640 
2641   PetscFunctionBegin;
2642   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2643   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2644   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2645 
2646   /* Get info on mapping */
2647   matis = (Mat_IS*)(mat->data);
2648   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2649   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2650 
2651   /* build local CSR graph of subdomains' connectivity */
2652   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2653   xadj[0] = 0;
2654   xadj[1] = PetscMax(n_neighs-1,0);
2655   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2656   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2657 
2658   if (threshold) {
2659     PetscInt* count,min_threshold;
2660     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2661     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2662     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2663       for (j=0;j<n_shared[i];j++) {
2664         count[shared[i][j]] += 1;
2665       }
2666     }
2667     /* adapt threshold since we dont want to lose significant connections */
2668     min_threshold = n_neighs;
2669     for (i=1;i<n_neighs;i++) {
2670       for (j=0;j<n_shared[i];j++) {
2671         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2672       }
2673     }
2674     threshold = PetscMax(min_threshold+1,threshold);
2675     xadj[1] = 0;
2676     for (i=1;i<n_neighs;i++) {
2677       for (j=0;j<n_shared[i];j++) {
2678         if (count[shared[i][j]] < threshold) {
2679           adjncy[xadj[1]] = neighs[i];
2680           adjncy_wgt[xadj[1]] = n_shared[i];
2681           xadj[1]++;
2682           break;
2683         }
2684       }
2685     }
2686     ierr = PetscFree(count);CHKERRQ(ierr);
2687   } else {
2688     if (xadj[1]) {
2689       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2690       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2691     }
2692   }
2693   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2694   if (use_square) {
2695     for (i=0;i<xadj[1];i++) {
2696       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2697     }
2698   }
2699   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2700 
2701   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2702 
2703   /*
2704     Restrict work on active processes only.
2705   */
2706   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2707   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2708   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2709   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2710   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2711   if (color) {
2712     ierr = PetscFree(xadj);CHKERRQ(ierr);
2713     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2714     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2715   } else {
2716     PetscInt coarsening_ratio;
2717     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2718     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2719     prank = rank;
2720     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2721     /*
2722     for (i=0;i<size;i++) {
2723       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2724     }
2725     */
2726     for (i=0;i<xadj[1];i++) {
2727       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2728     }
2729     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2730     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2731     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2732 
2733     /* Partition */
2734     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2735     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2736     if (use_vwgt) {
2737       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2738       v_wgt[0] = local_size;
2739       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2740     }
2741     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2742     coarsening_ratio = size/n_subdomains;
2743     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2744     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2745     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2746     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2747     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2748 
2749     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2750     if (contiguous) {
2751       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2752     } else {
2753       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2754     }
2755     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2756     /* clean up */
2757     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2758     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2759     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2760     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2761   }
2762   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2763 
2764   /* assemble parallel IS for sends */
2765   i = 1;
2766   if (color) i=0;
2767   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2768 
2769   /* get back IS */
2770   *is_sends = ranks_send_to;
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2775 
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatISSubassemble"
2778 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2779 {
2780   Mat                    local_mat;
2781   Mat_IS                 *matis;
2782   IS                     is_sends_internal;
2783   PetscInt               rows,cols;
2784   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2785   PetscBool              ismatis,isdense,destroy_mat;
2786   ISLocalToGlobalMapping l2gmap;
2787   PetscInt*              l2gmap_indices;
2788   const PetscInt*        is_indices;
2789   MatType                new_local_type;
2790   /* buffers */
2791   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2792   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2793   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2794   /* MPI */
2795   MPI_Comm               comm,comm_n;
2796   PetscSubcomm           subcomm;
2797   PetscMPIInt            n_sends,n_recvs,commsize;
2798   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2799   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2800   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2801   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2802   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2803   PetscErrorCode         ierr;
2804 
2805   PetscFunctionBegin;
2806   /* TODO: add missing checks */
2807   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2808   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2809   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2810   PetscValidLogicalCollectiveInt(mat,nis,7);
2811   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2812   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2813   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2814   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2815   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2816   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2817   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2818   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2819     PetscInt mrows,mcols,mnrows,mncols;
2820     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2821     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2822     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2823     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2824     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2825     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2826   }
2827   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2828   PetscValidLogicalCollectiveInt(mat,bs,0);
2829   /* prepare IS for sending if not provided */
2830   if (!is_sends) {
2831     PetscBool pcontig = PETSC_TRUE;
2832     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2833     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
2834   } else {
2835     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2836     is_sends_internal = is_sends;
2837   }
2838 
2839   /* get pointer of MATIS data */
2840   matis = (Mat_IS*)mat->data;
2841 
2842   /* get comm */
2843   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2844 
2845   /* compute number of sends */
2846   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2847   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2848 
2849   /* compute number of receives */
2850   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2851   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
2852   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2853   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2854   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2855   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2856   ierr = PetscFree(iflags);CHKERRQ(ierr);
2857 
2858   /* restrict comm if requested */
2859   subcomm = 0;
2860   destroy_mat = PETSC_FALSE;
2861   if (restrict_comm) {
2862     PetscMPIInt color,rank,subcommsize;
2863     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2864     color = 0;
2865     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
2866     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2867     subcommsize = commsize - subcommsize;
2868     /* check if reuse has been requested */
2869     if (reuse == MAT_REUSE_MATRIX) {
2870       if (*mat_n) {
2871         PetscMPIInt subcommsize2;
2872         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
2873         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
2874         comm_n = PetscObjectComm((PetscObject)*mat_n);
2875       } else {
2876         comm_n = PETSC_COMM_SELF;
2877       }
2878     } else { /* MAT_INITIAL_MATRIX */
2879       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
2880       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
2881       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2882       comm_n = subcomm->comm;
2883     }
2884     /* flag to destroy *mat_n if not significative */
2885     if (color) destroy_mat = PETSC_TRUE;
2886   } else {
2887     comm_n = comm;
2888   }
2889 
2890   /* prepare send/receive buffers */
2891   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
2892   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2893   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
2894   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2895   if (nis) {
2896     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
2897     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
2898   }
2899 
2900   /* Get data from local matrices */
2901   if (!isdense) {
2902     /* TODO: See below some guidelines on how to prepare the local buffers */
2903     /*
2904        send_buffer_vals should contain the raw values of the local matrix
2905        send_buffer_idxs should contain:
2906        - MatType_PRIVATE type
2907        - PetscInt        size_of_l2gmap
2908        - PetscInt        global_row_indices[size_of_l2gmap]
2909        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2910     */
2911   } else {
2912     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2913     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2914     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
2915     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2916     send_buffer_idxs[1] = i;
2917     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2918     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2919     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2920     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2921     for (i=0;i<n_sends;i++) {
2922       ilengths_vals[is_indices[i]] = len*len;
2923       ilengths_idxs[is_indices[i]] = len+2;
2924     }
2925   }
2926   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2927   /* additional is (if any) */
2928   if (nis) {
2929     PetscMPIInt psum;
2930     PetscInt j;
2931     for (j=0,psum=0;j<nis;j++) {
2932       PetscInt plen;
2933       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
2934       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
2935       psum += len+1; /* indices + lenght */
2936     }
2937     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
2938     for (j=0,psum=0;j<nis;j++) {
2939       PetscInt plen;
2940       const PetscInt *is_array_idxs;
2941       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
2942       send_buffer_idxs_is[psum] = plen;
2943       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
2944       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
2945       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
2946       psum += plen+1; /* indices + lenght */
2947     }
2948     for (i=0;i<n_sends;i++) {
2949       ilengths_idxs_is[is_indices[i]] = psum;
2950     }
2951     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
2952   }
2953 
2954   buf_size_idxs = 0;
2955   buf_size_vals = 0;
2956   buf_size_idxs_is = 0;
2957   for (i=0;i<n_recvs;i++) {
2958     buf_size_idxs += (PetscInt)olengths_idxs[i];
2959     buf_size_vals += (PetscInt)olengths_vals[i];
2960     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
2961   }
2962   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
2963   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
2964   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
2965 
2966   /* get new tags for clean communications */
2967   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2968   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2969   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
2970 
2971   /* allocate for requests */
2972   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
2973   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
2974   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
2975   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
2976   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
2977   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
2978 
2979   /* communications */
2980   ptr_idxs = recv_buffer_idxs;
2981   ptr_vals = recv_buffer_vals;
2982   ptr_idxs_is = recv_buffer_idxs_is;
2983   for (i=0;i<n_recvs;i++) {
2984     source_dest = onodes[i];
2985     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2986     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2987     ptr_idxs += olengths_idxs[i];
2988     ptr_vals += olengths_vals[i];
2989     if (nis) {
2990       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
2991       ptr_idxs_is += olengths_idxs_is[i];
2992     }
2993   }
2994   for (i=0;i<n_sends;i++) {
2995     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2996     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2997     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2998     if (nis) {
2999       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3000     }
3001   }
3002   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3003   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3004 
3005   /* assemble new l2g map */
3006   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3007   ptr_idxs = recv_buffer_idxs;
3008   buf_size_idxs = 0;
3009   for (i=0;i<n_recvs;i++) {
3010     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3011     ptr_idxs += olengths_idxs[i];
3012   }
3013   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3014   ptr_idxs = recv_buffer_idxs;
3015   buf_size_idxs = 0;
3016   for (i=0;i<n_recvs;i++) {
3017     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3018     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3019     ptr_idxs += olengths_idxs[i];
3020   }
3021   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3022   ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3023   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3024 
3025   /* infer new local matrix type from received local matrices type */
3026   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3027   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3028   if (n_recvs) {
3029     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3030     ptr_idxs = recv_buffer_idxs;
3031     for (i=0;i<n_recvs;i++) {
3032       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3033         new_local_type_private = MATAIJ_PRIVATE;
3034         break;
3035       }
3036       ptr_idxs += olengths_idxs[i];
3037     }
3038     switch (new_local_type_private) {
3039       case MATDENSE_PRIVATE:
3040         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3041           new_local_type = MATSEQAIJ;
3042           bs = 1;
3043         } else { /* if I receive only 1 dense matrix */
3044           new_local_type = MATSEQDENSE;
3045           bs = 1;
3046         }
3047         break;
3048       case MATAIJ_PRIVATE:
3049         new_local_type = MATSEQAIJ;
3050         bs = 1;
3051         break;
3052       case MATBAIJ_PRIVATE:
3053         new_local_type = MATSEQBAIJ;
3054         break;
3055       case MATSBAIJ_PRIVATE:
3056         new_local_type = MATSEQSBAIJ;
3057         break;
3058       default:
3059         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3060         break;
3061     }
3062   } else { /* by default, new_local_type is seqdense */
3063     new_local_type = MATSEQDENSE;
3064     bs = 1;
3065   }
3066 
3067   /* create MATIS object if needed */
3068   if (reuse == MAT_INITIAL_MATRIX) {
3069     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3070     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3071   } else {
3072     /* it also destroys the local matrices */
3073     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3074   }
3075   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3076   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3077   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3078   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3079 
3080   /* set values */
3081   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3082   ptr_vals = recv_buffer_vals;
3083   ptr_idxs = recv_buffer_idxs;
3084   for (i=0;i<n_recvs;i++) {
3085     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3086       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3087       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3088       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3089       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3090       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3091     } else {
3092       /* TODO */
3093     }
3094     ptr_idxs += olengths_idxs[i];
3095     ptr_vals += olengths_vals[i];
3096   }
3097   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3098   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3099   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3100   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3101 
3102 #if 0
3103   if (!restrict_comm) { /* check */
3104     Vec       lvec,rvec;
3105     PetscReal infty_error;
3106 
3107     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3108     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3109     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3110     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3111     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3112     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3113     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3114     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3115     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3116   }
3117 #endif
3118 
3119   /* assemble new additional is (if any) */
3120   if (nis) {
3121     PetscInt **temp_idxs,*count_is,j,psum;
3122 
3123     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3124     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3125     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3126     ptr_idxs = recv_buffer_idxs_is;
3127     psum = 0;
3128     for (i=0;i<n_recvs;i++) {
3129       for (j=0;j<nis;j++) {
3130         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3131         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3132         psum += plen;
3133         ptr_idxs += plen+1; /* shift pointer to received data */
3134       }
3135     }
3136     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3137     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3138     for (i=1;i<nis;i++) {
3139       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3140     }
3141     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3142     ptr_idxs = recv_buffer_idxs_is;
3143     for (i=0;i<n_recvs;i++) {
3144       for (j=0;j<nis;j++) {
3145         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3146         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3147         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3148         ptr_idxs += plen+1; /* shift pointer to received data */
3149       }
3150     }
3151     for (i=0;i<nis;i++) {
3152       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3153       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3154       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3155     }
3156     ierr = PetscFree(count_is);CHKERRQ(ierr);
3157     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3158     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3159   }
3160   /* free workspace */
3161   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3162   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3163   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3164   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3165   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3166   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3167   if (isdense) {
3168     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3169     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3170   } else {
3171     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3172   }
3173   if (nis) {
3174     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3175     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3176   }
3177   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3178   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3179   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3180   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3181   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3182   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3183   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3184   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3185   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3186   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3187   ierr = PetscFree(onodes);CHKERRQ(ierr);
3188   if (nis) {
3189     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3190     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3191     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3192   }
3193   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3194   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3195     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3196     for (i=0;i<nis;i++) {
3197       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3198     }
3199   }
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 /* temporary hack into ksp private data structure */
3204 #include <petsc-private/kspimpl.h>
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3208 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3209 {
3210   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3211   PC_IS                  *pcis = (PC_IS*)pc->data;
3212   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3213   MatNullSpace           CoarseNullSpace=NULL;
3214   ISLocalToGlobalMapping coarse_islg;
3215   IS                     coarse_is,*isarray;
3216   PetscInt               i,im_active=-1,active_procs=-1;
3217   PetscInt               nis,nisdofs,nisneu;
3218   PC                     pc_temp;
3219   PCType                 coarse_pc_type;
3220   KSPType                coarse_ksp_type;
3221   PetscBool              multilevel_requested,multilevel_allowed;
3222   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3223   Mat                    t_coarse_mat_is;
3224   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3225   PetscMPIInt            all_procs;
3226   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3227   PetscBool              compute_vecs = PETSC_FALSE;
3228   PetscErrorCode         ierr;
3229 
3230   PetscFunctionBegin;
3231   /* Assign global numbering to coarse dofs */
3232   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3233     compute_vecs = PETSC_TRUE;
3234     PetscInt ocoarse_size;
3235     ocoarse_size = pcbddc->coarse_size;
3236     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3237     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3238     /* see if we can avoid some work */
3239     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3240       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3241         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3242         coarse_reuse = PETSC_FALSE;
3243       } else { /* we can safely reuse already computed coarse matrix */
3244         coarse_reuse = PETSC_TRUE;
3245       }
3246     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3247       coarse_reuse = PETSC_FALSE;
3248     }
3249     /* reset any subassembling information */
3250     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3251     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3252   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3253     coarse_reuse = PETSC_TRUE;
3254   }
3255 
3256   /* count "active" (i.e. with positive local size) and "void" processes */
3257   im_active = !!(pcis->n);
3258   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3259   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3260   void_procs = all_procs-active_procs;
3261   csin_type_simple = PETSC_TRUE;
3262   if (pcbddc->current_level) {
3263     csin_ml = PETSC_TRUE;
3264     ncoarse_ml = void_procs;
3265     csin_ds = PETSC_TRUE;
3266     ncoarse_ds = void_procs;
3267     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3268   } else {
3269     csin_ml = PETSC_FALSE;
3270     ncoarse_ml = all_procs;
3271     if (void_procs) {
3272       csin_ds = PETSC_TRUE;
3273       ncoarse_ds = void_procs;
3274       csin_type_simple = PETSC_FALSE;
3275     } else {
3276       csin_ds = PETSC_FALSE;
3277       ncoarse_ds = all_procs;
3278     }
3279   }
3280 
3281   /*
3282     test if we can go multilevel: three conditions must be satisfied:
3283     - we have not exceeded the number of levels requested
3284     - we can actually subassemble the active processes
3285     - we can find a suitable number of MPI processes where we can place the subassembled problem
3286   */
3287   multilevel_allowed = PETSC_FALSE;
3288   multilevel_requested = PETSC_FALSE;
3289   if (pcbddc->current_level < pcbddc->max_levels) {
3290     multilevel_requested = PETSC_TRUE;
3291     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3292       multilevel_allowed = PETSC_FALSE;
3293     } else {
3294       multilevel_allowed = PETSC_TRUE;
3295     }
3296   }
3297   /* determine number of process partecipating to coarse solver */
3298   if (multilevel_allowed) {
3299     ncoarse = ncoarse_ml;
3300     csin = csin_ml;
3301   } else {
3302     ncoarse = ncoarse_ds;
3303     csin = csin_ds;
3304   }
3305 
3306   /* creates temporary l2gmap and IS for coarse indexes */
3307   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3308   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3309 
3310   /* creates temporary MATIS object for coarse matrix */
3311   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3312 #if 0
3313   {
3314     PetscViewer viewer;
3315     char filename[256];
3316     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3317     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3318     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3319     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3320     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3321   }
3322 #endif
3323   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3324   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3325   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3328 
3329   /* compute dofs splitting and neumann boundaries for coarse dofs */
3330   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3331     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3332     const PetscInt         *idxs;
3333     ISLocalToGlobalMapping tmap;
3334 
3335     /* create map between primal indices (in local representative ordering) and local primal numbering */
3336     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3337     /* allocate space for temporary storage */
3338     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3339     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3340     /* allocate for IS array */
3341     nisdofs = pcbddc->n_ISForDofsLocal;
3342     nisneu = !!pcbddc->NeumannBoundariesLocal;
3343     nis = nisdofs + nisneu;
3344     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3345     /* dofs splitting */
3346     for (i=0;i<nisdofs;i++) {
3347       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3348       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3349       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3350       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3351       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3352       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3353       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3354       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3355     }
3356     /* neumann boundaries */
3357     if (pcbddc->NeumannBoundariesLocal) {
3358       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3359       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3360       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3361       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3362       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3363       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3364       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3365       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3366     }
3367     /* free memory */
3368     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3369     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3370     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3371   } else {
3372     nis = 0;
3373     nisdofs = 0;
3374     nisneu = 0;
3375     isarray = NULL;
3376   }
3377   /* destroy no longer needed map */
3378   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3379 
3380   /* restrict on coarse candidates (if needed) */
3381   coarse_mat_is = NULL;
3382   if (csin) {
3383     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3384       PetscInt j,tissize,*nisindices;
3385       PetscInt *coarse_candidates;
3386       const PetscInt* tisindices;
3387       /* get coarse candidates' ranks in pc communicator */
3388       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3389       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3390       for (i=0,j=0;i<all_procs;i++) {
3391         if (!coarse_candidates[i]) {
3392           coarse_candidates[j]=i;
3393           j++;
3394         }
3395       }
3396       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3397       /* get a suitable subassembling pattern */
3398       if (csin_type_simple) {
3399         PetscMPIInt rank;
3400         PetscInt    issize,isidx;
3401         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3402         if (im_active) {
3403           issize = 1;
3404           isidx = (PetscInt)rank;
3405         } else {
3406           issize = 0;
3407           isidx = -1;
3408         }
3409         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3410       } else {
3411         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3412       }
3413       if (pcbddc->dbg_flag) {
3414         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3415         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3416         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3417         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3418         for (i=0;i<j;i++) {
3419           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3420         }
3421         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3422         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3423       }
3424       /* shift the pattern on coarse candidates */
3425       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3426       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3427       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3428       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3429       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3430       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3431       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3432     }
3433     if (pcbddc->dbg_flag) {
3434       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3435       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3436       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3437       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3438     }
3439     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3440     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3441   } else {
3442     if (pcbddc->dbg_flag) {
3443       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3444       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3445       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3446     }
3447     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3448     coarse_mat_is = t_coarse_mat_is;
3449   }
3450 
3451   /* create local to global scatters for coarse problem */
3452   if (compute_vecs) {
3453     PetscInt lrows;
3454     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3455     if (coarse_mat_is) {
3456       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3457     } else {
3458       lrows = 0;
3459     }
3460     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3461     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3462     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3463     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3464     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3465   }
3466   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3467   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3468 
3469   /* set defaults for coarse KSP and PC */
3470   if (multilevel_allowed) {
3471     coarse_ksp_type = KSPRICHARDSON;
3472     coarse_pc_type = PCBDDC;
3473   } else {
3474     coarse_ksp_type = KSPPREONLY;
3475     coarse_pc_type = PCREDUNDANT;
3476   }
3477 
3478   /* print some info if requested */
3479   if (pcbddc->dbg_flag) {
3480     if (!multilevel_allowed) {
3481       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3482       if (multilevel_requested) {
3483         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3484       } else if (pcbddc->max_levels) {
3485         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3486       }
3487       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3488     }
3489   }
3490 
3491   /* create the coarse KSP object only once with defaults */
3492   if (coarse_mat_is) {
3493     MatReuse coarse_mat_reuse;
3494     PetscViewer dbg_viewer = NULL;
3495     if (pcbddc->dbg_flag) {
3496       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3497       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3498     }
3499     if (!pcbddc->coarse_ksp) {
3500       char prefix[256],str_level[16];
3501       size_t len;
3502       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3503       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3504       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3505       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3506       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3507       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3508       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3509       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3510       /* prefix */
3511       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3512       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3513       if (!pcbddc->current_level) {
3514         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3515         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3516       } else {
3517         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3518         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3519         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3520         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3521         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3522         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3523       }
3524       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3525       /* allow user customization */
3526       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3527       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3528     }
3529 
3530     /* get some info after set from options */
3531     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3532     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3533     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3534     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3535     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3536       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3537       isbddc = PETSC_FALSE;
3538     }
3539     if (isredundant) {
3540       KSP inner_ksp;
3541       PC inner_pc;
3542       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3543       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3544       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3545     }
3546 
3547     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3548     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3549     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3550     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3551     if (nisdofs) {
3552       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3553       for (i=0;i<nisdofs;i++) {
3554         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3555       }
3556     }
3557     if (nisneu) {
3558       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3559       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3560     }
3561 
3562     /* assemble coarse matrix */
3563     if (coarse_reuse) {
3564       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3565       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3566       coarse_mat_reuse = MAT_REUSE_MATRIX;
3567     } else {
3568       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3569     }
3570     if (isbddc || isnn) {
3571       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3572         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3573         if (pcbddc->dbg_flag) {
3574           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3575           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3576           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3577           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3578         }
3579       }
3580       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3581     } else {
3582       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3583     }
3584     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3585 
3586     /* propagate symmetry info to coarse matrix */
3587     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3588     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3589 
3590     /* set operators */
3591     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3592     if (pcbddc->dbg_flag) {
3593       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3594     }
3595   } else { /* processes non partecipating to coarse solver (if any) */
3596     coarse_mat = 0;
3597   }
3598   ierr = PetscFree(isarray);CHKERRQ(ierr);
3599 #if 0
3600   {
3601     PetscViewer viewer;
3602     char filename[256];
3603     sprintf(filename,"coarse_mat.m");
3604     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3605     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3606     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3607     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3608   }
3609 #endif
3610 
3611   /* Compute coarse null space (special handling by BDDC only) */
3612   if (pcbddc->NullSpace) {
3613     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3614   }
3615 
3616   if (pcbddc->coarse_ksp) {
3617     Vec crhs,csol;
3618     PetscBool ispreonly;
3619     if (CoarseNullSpace) {
3620       if (isbddc) {
3621         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3622       } else {
3623         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3624       }
3625     }
3626     /* setup coarse ksp */
3627     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3628     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3629     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3630     /* hack */
3631     if (!csol) {
3632       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3633     }
3634     if (!crhs) {
3635       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3636     }
3637     /* Check coarse problem if in debug mode or if solving with an iterative method */
3638     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3639     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3640       KSP       check_ksp;
3641       KSPType   check_ksp_type;
3642       PC        check_pc;
3643       Vec       check_vec,coarse_vec;
3644       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3645       PetscInt  its;
3646       PetscBool compute_eigs;
3647       PetscReal *eigs_r,*eigs_c;
3648       PetscInt  neigs;
3649       const char *prefix;
3650 
3651       /* Create ksp object suitable for estimation of extreme eigenvalues */
3652       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3653       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3654       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3655       if (ispreonly) {
3656         check_ksp_type = KSPPREONLY;
3657         compute_eigs = PETSC_FALSE;
3658       } else {
3659         check_ksp_type = KSPGMRES;
3660         compute_eigs = PETSC_TRUE;
3661       }
3662       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3663       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3664       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3665       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3666       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3667       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3668       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3669       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3670       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3671       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3672       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3673       /* create random vec */
3674       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3675       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3676       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3677       if (CoarseNullSpace) {
3678         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3679       }
3680       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3681       /* solve coarse problem */
3682       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3683       if (CoarseNullSpace) {
3684         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3685       }
3686       /* set eigenvalue estimation if preonly has not been requested */
3687       if (compute_eigs) {
3688         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3689         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3690         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3691         lambda_max = eigs_r[neigs-1];
3692         lambda_min = eigs_r[0];
3693         if (pcbddc->use_coarse_estimates) {
3694           if (lambda_max>lambda_min) {
3695             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3696             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3697           }
3698         }
3699       }
3700 
3701       /* check coarse problem residual error */
3702       if (pcbddc->dbg_flag) {
3703         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3704         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3705         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3706         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3707         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3708         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3709         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3710         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3711         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3712         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3713         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3714         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3715         if (compute_eigs) {
3716           PetscReal lambda_max_s,lambda_min_s;
3717           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3718           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3719           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3720           for (i=0;i<neigs;i++) {
3721             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3722           }
3723         }
3724         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3725         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3726       }
3727       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3728       if (compute_eigs) {
3729         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3730         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3731       }
3732     }
3733   }
3734   /* print additional info */
3735   if (pcbddc->dbg_flag) {
3736     /* waits until all processes reaches this point */
3737     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3738     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3739     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3740   }
3741 
3742   /* free memory */
3743   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3744   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #undef __FUNCT__
3749 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3750 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3751 {
3752   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3753   PC_IS*         pcis = (PC_IS*)pc->data;
3754   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3755   PetscInt       i,coarse_size;
3756   PetscInt       *local_primal_indices;
3757   PetscErrorCode ierr;
3758 
3759   PetscFunctionBegin;
3760   /* Compute global number of coarse dofs */
3761   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3762     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3763   }
3764   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3765 
3766   /* check numbering */
3767   if (pcbddc->dbg_flag) {
3768     PetscScalar coarsesum,*array;
3769     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3770 
3771     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3772     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3773     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3774     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3775     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3776     for (i=0;i<pcbddc->local_primal_size;i++) {
3777       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3778     }
3779     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3780     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3781     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3782     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3783     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3784     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3785     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3786     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3787     for (i=0;i<pcis->n;i++) {
3788       if (array[i] == 1.0) {
3789         set_error = PETSC_TRUE;
3790         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3791       }
3792     }
3793     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3794     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3795     for (i=0;i<pcis->n;i++) {
3796       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3797     }
3798     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3799     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3800     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3801     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3802     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3803     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3804     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
3805       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3806       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3807       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3808       for (i=0;i<pcbddc->local_primal_size;i++) {
3809         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3810       }
3811       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3812     }
3813     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3814     if (set_error_reduced) {
3815       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
3816     }
3817   }
3818   /* get back data */
3819   *coarse_size_n = coarse_size;
3820   *local_primal_indices_n = local_primal_indices;
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 #undef __FUNCT__
3825 #define __FUNCT__ "PCBDDCGlobalToLocal"
3826 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
3827 {
3828   IS             localis_t;
3829   PetscInt       i,lsize,*idxs,n;
3830   PetscScalar    *vals;
3831   PetscErrorCode ierr;
3832 
3833   PetscFunctionBegin;
3834   /* get indices in local ordering exploiting local to global map */
3835   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3836   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3837   for (i=0;i<lsize;i++) vals[i] = 1.0;
3838   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3839   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
3840   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
3841   if (idxs) { /* multilevel guard */
3842     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3843   }
3844   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
3845   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3846   ierr = PetscFree(vals);CHKERRQ(ierr);
3847   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
3848   /* now compute set in local ordering */
3849   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3850   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3851   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
3852   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
3853   for (i=0,lsize=0;i<n;i++) {
3854     if (PetscRealPart(vals[i]) > 0.5) {
3855       lsize++;
3856     }
3857   }
3858   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3859   for (i=0,lsize=0;i<n;i++) {
3860     if (PetscRealPart(vals[i]) > 0.5) {
3861       idxs[lsize++] = i;
3862     }
3863   }
3864   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
3865   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3866   *localis = localis_t;
3867   PetscFunctionReturn(0);
3868 }
3869