xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision b1b3d7a2dc467bc01d0ce7eb62bfacc6b4178a73)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #if 0
9 #undef __FUNCT__
10 #define __FUNCT__ "PCBDDCAdaptiveSelection"
11 PetscErrorCode PCBDDCAdaptiveSelection(PC pc, MatNullSpace* adapt_nnsp)
12 {
13   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs[0],pcbddc->mat_graph,...);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 #endif
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "PCBDDCSetUpSolvers"
24 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
25 {
26   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
27   PetscScalar    *coarse_submat_vals;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   /* Setup local scatters R_to_B and (optionally) R_to_D */
32   /* PCBDDCSetUpLocalWorkVectors should be called first! */
33   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
34 
35   /* Setup local neumann solver ksp_R */
36   /* PCBDDCSetUpLocalScatters should be called first! */
37   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
38 
39   /* Change global null space passed in by the user if change of basis has been requested */
40   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
41     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
42   }
43 
44   /*
45      Setup local correction and local part of coarse basis.
46      Gives back the dense local part of the coarse matrix in column major ordering
47   */
48   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
49 
50   /* Compute total number of coarse nodes and setup coarse solver */
51   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
52 
53   /* free */
54   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "PCBDDCResetCustomization"
60 PetscErrorCode PCBDDCResetCustomization(PC pc)
61 {
62   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
67   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
68   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
70   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
71   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
72   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
73   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
74   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
75   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
76   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetTopography"
82 PetscErrorCode PCBDDCResetTopography(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
89   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
91   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
92   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[0]);CHKERRQ(ierr);
93   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[1]);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "PCBDDCResetSolvers"
99 PetscErrorCode PCBDDCResetSolvers(PC pc)
100 {
101   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
102   PetscErrorCode ierr;
103 
104   PetscFunctionBegin;
105   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
110   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
113   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
114   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
115   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
116   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
117   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
118   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
119   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
120   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
121   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
122   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
123   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
124   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
125   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
126   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
127   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
128   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
134 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
135 {
136   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
137   PC_IS          *pcis = (PC_IS*)pc->data;
138   VecType        impVecType;
139   PetscInt       n_constraints,n_R,old_size;
140   PetscErrorCode ierr;
141 
142   PetscFunctionBegin;
143   if (!pcbddc->ConstraintMatrix) {
144     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
145   }
146   /* get sizes */
147   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
148   n_R = pcis->n-pcbddc->n_actual_vertices;
149   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
150   /* local work vectors (try to avoid unneeded work)*/
151   /* R nodes */
152   old_size = -1;
153   if (pcbddc->vec1_R) {
154     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
155   }
156   if (n_R != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
158     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
159     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
160     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
161     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
162     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
163   }
164   /* local primal dofs */
165   old_size = -1;
166   if (pcbddc->vec1_P) {
167     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
168   }
169   if (pcbddc->local_primal_size != old_size) {
170     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
171     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
172     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
173     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
174   }
175   /* local explicit constraints */
176   old_size = -1;
177   if (pcbddc->vec1_C) {
178     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
179   }
180   if (n_constraints && n_constraints != old_size) {
181     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
182     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
183     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
184     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "PCBDDCSetUpCorrection"
191 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
192 {
193   PetscErrorCode         ierr;
194   /* pointers to pcis and pcbddc */
195   PC_IS*                 pcis = (PC_IS*)pc->data;
196   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
197   /* submatrices of local problem */
198   Mat                    A_RV,A_VR,A_VV;
199   /* working matrices */
200   Mat                    M1,M2,M3,C_CR;
201   /* working vectors */
202   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
203   /* additional working stuff */
204   IS                     is_aux;
205   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
206   const PetscScalar      *array,*row_cmat_values;
207   const PetscInt         *row_cmat_indices,*idx_R_local;
208   PetscInt               *idx_V_B,*auxindices;
209   PetscInt               n_vertices,n_constraints,size_of_constraint;
210   PetscInt               i,j,n_R,n_D,n_B;
211   PetscBool              unsymmetric_check;
212   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
213   MatType                impMatType;
214   /* some shortcuts to scalars */
215   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
216   /* for debugging purposes */
217   PetscReal              *coarsefunctions_errors,*constraints_errors;
218 
219   PetscFunctionBegin;
220   /* get number of vertices (corners plus constraints with change of basis)
221      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
222   n_vertices = pcbddc->n_actual_vertices;
223   n_constraints = pcbddc->local_primal_size-n_vertices;
224   /* Set Non-overlapping dimensions */
225   n_B = pcis->n_B; n_D = pcis->n - n_B;
226   n_R = pcis->n-n_vertices;
227 
228   /* Set types for local objects needed by BDDC precondtioner */
229   impMatType = MATSEQDENSE;
230 
231   /* Allocating some extra storage just to be safe */
232   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
233   for (i=0;i<pcis->n;i++) auxindices[i]=i;
234 
235   /* vertices in boundary numbering */
236   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
237   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
238   if (i != n_vertices) {
239     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
240   }
241 
242   /* Precompute stuffs needed for preprocessing and application of BDDC*/
243   if (n_constraints) {
244     /* see if we can save some allocations */
245     if (pcbddc->local_auxmat2) {
246       PetscInt on_R,on_constraints;
247       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
248       if (on_R != n_R || on_constraints != n_constraints) {
249         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
250         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
251       }
252     }
253     /* work vectors */
254     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
255     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
256     /* auxiliary matrices */
257     if (!pcbddc->local_auxmat2) {
258       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
259       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
260       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
261       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
262     }
263 
264     /* Extract constraints on R nodes: C_{CR}  */
265     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
266     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
267     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
268 
269     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
270     for (i=0;i<n_constraints;i++) {
271       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
272       /* Get row of constraint matrix in R numbering */
273       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
274       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
275       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
276       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
277       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
278       /* Solve for row of constraint matrix in R numbering */
279       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
280       /* Set values in local_auxmat2 */
281       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
282       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
283       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
284     }
285     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
288 
289     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
290     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
291     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
292     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
293     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
294     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
295     ierr = MatSetUp(M1);CHKERRQ(ierr);
296     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
297     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
298     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
299     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
300     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
301     ierr = MatDestroy(&M2);CHKERRQ(ierr);
302     ierr = MatDestroy(&M3);CHKERRQ(ierr);
303     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
304     if (!pcbddc->local_auxmat1) {
305       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
306     } else {
307       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
308     }
309   }
310 
311   /* Get submatrices from subdomain matrix */
312   if (n_vertices) {
313     PetscInt ibs,mbs;
314     PetscBool issbaij;
315     Mat newmat;
316 
317     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
318     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
319     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
320     if (ibs != mbs) { /* need to convert to SEQAIJ */
321       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
322       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
325       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
326     } else {
327       /* this is safe */
328       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
329       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
330       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
331         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
332         /* which of the two approaches is faster? */
333         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
334         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
335         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
336         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
337         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
338       } else {
339         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
340         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
341       }
342     }
343     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
344     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
345     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
346   }
347 
348   /* Matrix of coarse basis functions (local) */
349   if (pcbddc->coarse_phi_B) {
350     PetscInt on_B,on_primal;
351     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
352     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
353       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
355     }
356   }
357   if (pcbddc->coarse_phi_D) {
358     PetscInt on_D,on_primal;
359     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
360     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
361       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
362       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
363     }
364   }
365   if (!pcbddc->coarse_phi_B) {
366     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
367     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
368     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
369     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
370   }
371   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
372     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
373     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
374     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
375     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
376   }
377 
378   if (pcbddc->dbg_flag) {
379     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
380     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
381     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
382   }
383   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
384   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
385 
386   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
387 
388   /* vertices */
389   for (i=0;i<n_vertices;i++) {
390     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
391     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
392     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
393     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
394     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
395     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
396     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
397     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
398     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
399     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
400     if (n_constraints) {
401       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
402       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
403       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
404     }
405     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
406     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
407 
408     /* Set values in coarse basis function and subdomain part of coarse_mat */
409     /* coarse basis functions */
410     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
411     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
412     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
416     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
417     if (pcbddc->switch_static || pcbddc->dbg_flag) {
418       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
419       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
421       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
423     }
424     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
425     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
427     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
428     if (n_constraints) {
429       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
430       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
432     }
433 
434     /* check */
435     if (pcbddc->dbg_flag) {
436       /* assemble subdomain vector on local nodes */
437       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
438       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
439       if (n_R) {
440         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
441       }
442       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
443       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
444       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
445       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
446       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
447       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
448       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
449       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
450       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
451       if (n_constraints) {
452         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
453         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
454         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
455       }
456       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
457       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
458       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
459       /* check saddle point solution */
460       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
461       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
462       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
463       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
464       /* shift by the identity matrix */
465       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
466       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
467       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
468       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
469     }
470   }
471 
472   /* constraints */
473   for (i=0;i<n_constraints;i++) {
474     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
475     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
477     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
478     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
479     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
480     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
482     if (n_vertices) {
483       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
484     }
485     /* Set values in coarse basis function and subdomain part of coarse_mat */
486     /* coarse basis functions */
487     j = i+n_vertices; /* don't touch this! */
488     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
489     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
494     if (pcbddc->switch_static || pcbddc->dbg_flag) {
495       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
498       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
500     }
501     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
502     if (n_vertices) {
503       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
505       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506     }
507     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
509     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510 
511     if (pcbddc->dbg_flag) {
512       /* assemble subdomain vector on nodes */
513       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
514       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
515       if (n_R) {
516         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
517       }
518       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
521       /* assemble subdomain vector of lagrange multipliers */
522       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
523       if (n_vertices) {
524         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
525         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
526         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
527       }
528       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
529       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
530       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
531       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
532       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
533       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
534       /* check saddle point solution */
535       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
536       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
537       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
538       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
539       /* shift by the identity matrix */
540       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
541       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
542       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
543       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
544     }
545   }
546   /* call assembling routines for local coarse basis */
547   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549   if (pcbddc->switch_static || pcbddc->dbg_flag) {
550     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552   }
553 
554   /* compute other basis functions for non-symmetric problems */
555   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
556   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
557     PetscBool setsym;
558     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
559     if (!setsym) pcbddc->issym = PETSC_FALSE;
560   }
561 
562   if (!pcbddc->issym) {
563     if (!pcbddc->coarse_psi_B) {
564       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
565       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
566       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
567       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
568     }
569     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
570       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
571       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
572       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
573       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
574     }
575     for (i=0;i<pcbddc->local_primal_size;i++) {
576       if (n_constraints) {
577         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
578         for (j=0;j<n_constraints;j++) {
579           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
580         }
581         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
582         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
583       }
584       if (i<n_vertices) {
585         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
586         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
587         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
588         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
589         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
590         if (n_constraints) {
591           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
592         }
593       } else {
594         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
595       }
596       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
597       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
598       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
599       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
600       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
601       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
602       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
603       if (i<n_vertices) {
604         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
605       }
606       if (pcbddc->switch_static || pcbddc->dbg_flag) {
607         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
608         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
609         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
610         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
611         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
612       }
613 
614       if (pcbddc->dbg_flag) {
615         /* assemble subdomain vector on nodes */
616         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
617         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
618         if (n_R) {
619           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
620         }
621         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
622         if (i<n_vertices) {
623           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
624         }
625         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
626         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
627         /* assemble subdomain vector of lagrange multipliers */
628         for (j=0;j<pcbddc->local_primal_size;j++) {
629           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
630         }
631         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
632         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
633         /* check saddle point solution */
634         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
635         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
636         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
637         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
638         /* shift by the identity matrix */
639         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
640         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
641         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
642         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
643       }
644     }
645     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
647     if (pcbddc->switch_static || pcbddc->dbg_flag) {
648       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
649       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
650     }
651     unsymmetric_check = PETSC_TRUE;
652   } else { /* take references to already computed coarse basis */
653     unsymmetric_check = PETSC_FALSE;
654     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
655     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
656     if (pcbddc->coarse_phi_D) {
657       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
658       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
659     }
660   }
661   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
662   /* Checking coarse_sub_mat and coarse basis functios */
663   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
664   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
665   if (pcbddc->dbg_flag) {
666     Mat         coarse_sub_mat;
667     Mat         AUXMAT,TM1,TM2,TM3,TM4;
668     Mat         coarse_phi_D,coarse_phi_B;
669     Mat         coarse_psi_D,coarse_psi_B;
670     Mat         A_II,A_BB,A_IB,A_BI;
671     MatType     checkmattype=MATSEQAIJ;
672     PetscReal   real_value;
673 
674     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
675     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
676     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
677     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
678     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
679     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
680     if (unsymmetric_check) {
681       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
682       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
683     }
684     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
685 
686     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
687     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
688     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
689     if (unsymmetric_check) {
690       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
691       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
692       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
693       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
694       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
695       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
696       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
697       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
698       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
699       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
700       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
701       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
702     } else {
703       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
704       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
705       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
706       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
707       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
708       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
709       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
710       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
711     }
712     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
713     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
714     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
715     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
716     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
717     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
718     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
719     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
720     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
721     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
722     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
723     for (i=0;i<pcbddc->local_primal_size;i++) {
724       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
725     }
726     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
727     for (i=0;i<pcbddc->local_primal_size;i++) {
728       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
729     }
730     if (unsymmetric_check) {
731       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
732       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
733         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
734       }
735       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
736       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
737         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
738       }
739     }
740     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
741     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
742     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
743     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
744     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
745     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
746     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
747     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
748     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
749     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
750     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
751     if (unsymmetric_check) {
752       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
753       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
754     }
755     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
756     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
757     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
758     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
759   }
760   /* free memory */
761   if (n_vertices) {
762     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
763     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
764     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
765     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
766     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
767   }
768   if (n_constraints) {
769     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
770     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
771     ierr = MatDestroy(&M1);CHKERRQ(ierr);
772     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
773   }
774   ierr = PetscFree(auxindices);CHKERRQ(ierr);
775   /* get back data */
776   *coarse_submat_vals_n = coarse_submat_vals;
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
782 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
783 {
784   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
785   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
786   Mat            *change_mat_all;
787   IS             is_local,is_global;
788   PetscBool      sorted,isseqaij;
789   PetscInt       local_size,*idxs_perm;
790   PetscErrorCode ierr;
791 
792   PetscFunctionBegin;
793   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
794   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
795   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
796   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
797   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
798   ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
799   if (!sorted) {
800     const PetscInt *idxs;
801     PetscInt *idxs_sorted,i;
802 
803     ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr);
804     ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr);
805     for (i=0;i<local_size;i++) {
806       idxs_perm[i] = i;
807     }
808     ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
809     ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr);
810     for (i=0;i<local_size;i++) {
811       idxs_sorted[i] = idxs[idxs_perm[i]];
812     }
813     ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
814     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
815     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
816   }
817 
818   /* get change of basis on the whole set of local dofs */
819   ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
820 
821   if (!sorted) {
822     Mat      new_mat;
823     IS       is_perm;
824     PetscInt *idxs,i;
825 
826     ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr);
827     for (i=0;i<local_size;i++) {
828       idxs[idxs_perm[i]] = i;
829     }
830     ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
831     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
832     ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
833     ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
834     ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
835     change_mat_all[0] = new_mat;
836     ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
837   }
838 
839   /* check */
840   if (pcbddc->dbg_flag) {
841     Vec       x,x_change;
842     PetscReal error;
843 
844     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
845     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
846     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
847     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
849     ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr);
850     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
851     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
853     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
854     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
855     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
856     ierr = VecDestroy(&x);CHKERRQ(ierr);
857     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
858   }
859 
860   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
861   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
862   if (isseqaij) {
863     ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
864   } else {
865     Mat work_mat;
866     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
867     ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
868     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
869   }
870   /*
871   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
872   ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
873   */
874   ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
875   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
881 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
882 {
883   PC_IS*         pcis = (PC_IS*)(pc->data);
884   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
885   IS             is_aux1,is_aux2;
886   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
887   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
888   PetscInt       vbs,bs;
889   PetscBT        bitmask;
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   /*
894     No need to setup local scatters if
895       - primal space is unchanged
896         AND
897       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
898         AND
899       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
900   */
901   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
902     PetscFunctionReturn(0);
903   }
904   /* destroy old objects */
905   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
906   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
907   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
908   /* Set Non-overlapping dimensions */
909   n_B = pcis->n_B; n_D = pcis->n - n_B;
910   n_vertices = pcbddc->n_actual_vertices;
911   /* create auxiliary bitmask */
912   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
913   for (i=0;i<n_vertices;i++) {
914     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
915   }
916 
917   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
918   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
919   for (i=0, n_R=0; i<pcis->n; i++) {
920     if (!PetscBTLookup(bitmask,i)) {
921       idx_R_local[n_R] = i;
922       n_R++;
923     }
924   }
925 
926   /* Block code */
927   vbs = 1;
928   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
929   if (bs>1 && !(n_vertices%bs)) {
930     PetscBool is_blocked = PETSC_TRUE;
931     PetscInt  *vary;
932     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
933     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
934     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
935     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
936     for (i=0; i<n_vertices; i++) {
937       if (vary[i]!=0 && vary[i]!=bs) {
938         is_blocked = PETSC_FALSE;
939         break;
940       }
941     }
942     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
943       vbs = bs;
944       for (i=0;i<n_R/vbs;i++) {
945         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
946       }
947     }
948     ierr = PetscFree(vary);CHKERRQ(ierr);
949   }
950   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
951   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
952 
953   /* print some info if requested */
954   if (pcbddc->dbg_flag) {
955     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
959     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
960     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
961     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
962     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
963   }
964 
965   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
966   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
967   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
968   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
969   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
970   for (i=0; i<n_D; i++) {
971     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
972   }
973   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   for (i=0, j=0; i<n_R; i++) {
975     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
976       aux_array1[j++] = i;
977     }
978   }
979   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
980   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
981   for (i=0, j=0; i<n_B; i++) {
982     if (!PetscBTLookup(bitmask,is_indices[i])) {
983       aux_array2[j++] = i;
984     }
985   }
986   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
987   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
988   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
989   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
990   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
991 
992   if (pcbddc->switch_static || pcbddc->dbg_flag) {
993     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
994     for (i=0, j=0; i<n_R; i++) {
995       if (PetscBTLookup(bitmask,idx_R_local[i])) {
996         aux_array1[j++] = i;
997       }
998     }
999     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1000     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1001     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1002   }
1003   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1004   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1011 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1012 {
1013   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1014   PC_IS          *pcis = (PC_IS*)pc->data;
1015   PC             pc_temp;
1016   Mat            A_RR;
1017   MatReuse       reuse;
1018   PetscScalar    m_one = -1.0;
1019   PetscReal      value;
1020   PetscInt       n_D,n_R,ibs,mbs;
1021   PetscBool      use_exact,use_exact_reduced,issbaij;
1022   PetscErrorCode ierr;
1023   /* prefixes stuff */
1024   char           dir_prefix[256],neu_prefix[256],str_level[16];
1025   size_t         len;
1026 
1027   PetscFunctionBegin;
1028 
1029   /* compute prefixes */
1030   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1031   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1032   if (!pcbddc->current_level) {
1033     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1034     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1035     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1036     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1037   } else {
1038     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1039     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1040     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1041     len -= 15; /* remove "pc_bddc_coarse_" */
1042     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1043     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1044     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1045     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1046     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1047     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1048     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1049     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1050   }
1051 
1052   /* DIRICHLET PROBLEM */
1053   if (dirichlet) {
1054     /* Matrix for Dirichlet problem is pcis->A_II */
1055     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1056     if (!pcbddc->ksp_D) { /* create object if not yet build */
1057       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1058       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1059       /* default */
1060       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1061       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1062       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1063       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1064       if (issbaij) {
1065         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1066       } else {
1067         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1068       }
1069       /* Allow user's customization */
1070       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1071       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1072     }
1073     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1074     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1075     if (!n_D) {
1076       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1077       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1078     }
1079     /* Set Up KSP for Dirichlet problem of BDDC */
1080     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1081     /* set ksp_D into pcis data */
1082     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1083     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1084     pcis->ksp_D = pcbddc->ksp_D;
1085   }
1086 
1087   /* NEUMANN PROBLEM */
1088   A_RR = 0;
1089   if (neumann) {
1090     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1091     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1092     if (pcbddc->ksp_R) { /* already created ksp */
1093       PetscInt nn_R;
1094       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1095       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1096       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1097       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1098         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1099         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1100         reuse = MAT_INITIAL_MATRIX;
1101       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1102         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1103           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1104           reuse = MAT_INITIAL_MATRIX;
1105         } else { /* safe to reuse the matrix */
1106           reuse = MAT_REUSE_MATRIX;
1107         }
1108       }
1109       /* last check */
1110       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1111         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1112         reuse = MAT_INITIAL_MATRIX;
1113       }
1114     } else { /* first time, so we need to create the matrix */
1115       reuse = MAT_INITIAL_MATRIX;
1116     }
1117     /* extract A_RR */
1118     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1119     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1120     if (ibs != mbs) {
1121       Mat newmat;
1122       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1123       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1124       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1127     }
1128     if (!pcbddc->ksp_R) { /* create object if not present */
1129       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1130       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1131       /* default */
1132       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1133       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1134       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1135       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1136       if (issbaij) {
1137         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1138       } else {
1139         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1140       }
1141       /* Allow user's customization */
1142       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1143       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1144     }
1145     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1146     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1147     if (!n_R) {
1148       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1149       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1150     }
1151     /* Set Up KSP for Neumann problem of BDDC */
1152     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1153   }
1154 
1155   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1156   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1157     if (pcbddc->dbg_flag) {
1158       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1159       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1160       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1161     }
1162     if (dirichlet) { /* Dirichlet */
1163       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1164       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1165       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1166       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1167       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1168       /* need to be adapted? */
1169       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1170       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1171       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1172       /* print info */
1173       if (pcbddc->dbg_flag) {
1174         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1175         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1176       }
1177       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1178         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1179       }
1180     }
1181     if (neumann) { /* Neumann */
1182       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1183       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1184       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1185       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1186       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1187       /* need to be adapted? */
1188       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1189       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1190       /* print info */
1191       if (pcbddc->dbg_flag) {
1192         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1193         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1194       }
1195       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1196         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1197       }
1198     }
1199   }
1200   /* free Neumann problem's matrix */
1201   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1207 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1208 {
1209   PetscErrorCode ierr;
1210   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1211 
1212   PetscFunctionBegin;
1213   if (applytranspose) {
1214     if (pcbddc->local_auxmat1) {
1215       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1216       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1217     }
1218     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1219   } else {
1220     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1221     if (pcbddc->local_auxmat1) {
1222       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1223       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1224     }
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1230 #undef __FUNCT__
1231 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1232 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1233 {
1234   PetscErrorCode ierr;
1235   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1236   PC_IS*            pcis = (PC_IS*)  (pc->data);
1237   const PetscScalar zero = 0.0;
1238 
1239   PetscFunctionBegin;
1240   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1241   if (applytranspose) {
1242     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1243     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1244   } else {
1245     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1246     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1247   }
1248   /* start communications from local primal nodes to rhs of coarse solver */
1249   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1250   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1254   /* TODO remove null space when doing multilevel */
1255   if (pcbddc->coarse_ksp) {
1256     if (applytranspose) {
1257       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1258     } else {
1259       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1260     }
1261   }
1262 
1263   /* Local solution on R nodes */
1264   if (pcis->n) {
1265     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1266     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1268     if (pcbddc->switch_static) {
1269       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1270       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1271     }
1272     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1273     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1274     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1275     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     if (pcbddc->switch_static) {
1277       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     }
1280   }
1281 
1282   /* communications from coarse sol to local primal nodes */
1283   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1284   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285 
1286   /* Sum contributions from two levels */
1287   if (applytranspose) {
1288     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1289     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1290   } else {
1291     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1292     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1298 #undef __FUNCT__
1299 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1300 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1301 {
1302   PetscErrorCode ierr;
1303   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1304   PetscScalar    *array,*array2;
1305   Vec            from,to;
1306 
1307   PetscFunctionBegin;
1308   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1309     from = pcbddc->coarse_vec;
1310     to = pcbddc->vec1_P;
1311     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1312       Vec tvec;
1313       PetscInt lsize;
1314       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1315       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1316       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1317       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1318       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1319       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1320       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1321     }
1322   } else { /* from local to global -> put data in coarse right hand side */
1323     from = pcbddc->vec1_P;
1324     to = pcbddc->coarse_vec;
1325   }
1326   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1332 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1333 {
1334   PetscErrorCode ierr;
1335   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1336   PetscScalar    *array,*array2;
1337   Vec            from,to;
1338 
1339   PetscFunctionBegin;
1340   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1341     from = pcbddc->coarse_vec;
1342     to = pcbddc->vec1_P;
1343   } else { /* from local to global -> put data in coarse right hand side */
1344     from = pcbddc->vec1_P;
1345     to = pcbddc->coarse_vec;
1346   }
1347   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1348   if (smode == SCATTER_FORWARD) {
1349     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1350       Vec tvec;
1351       PetscInt lsize;
1352       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1353       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1354       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1355       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1356       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1357       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1358       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1359     }
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 /* uncomment for testing purposes */
1365 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1366 #undef __FUNCT__
1367 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1368 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1369 {
1370   PetscErrorCode    ierr;
1371   PC_IS*            pcis = (PC_IS*)(pc->data);
1372   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1373   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1374   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1375   MatType           impMatType=MATSEQAIJ;
1376   /* one and zero */
1377   PetscScalar       one=1.0,zero=0.0;
1378   /* space to store constraints and their local indices */
1379   PetscScalar       *temp_quadrature_constraint;
1380   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1381   /* iterators */
1382   PetscInt          i,j,k,total_counts,temp_start_ptr;
1383   /* stuff to store connected components stored in pcbddc->mat_graph */
1384   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1385   PetscInt          n_ISForFaces,n_ISForEdges;
1386   /* near null space stuff */
1387   MatNullSpace      nearnullsp;
1388   const Vec         *nearnullvecs;
1389   Vec               *localnearnullsp;
1390   PetscBool         nnsp_has_cnst;
1391   PetscInt          nnsp_size;
1392   PetscScalar       *array;
1393   /* BLAS integers */
1394   PetscBLASInt      lwork,lierr;
1395   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1396   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1397   /* LAPACK working arrays for SVD or POD */
1398   PetscBool         skip_lapack;
1399   PetscScalar       *work;
1400   PetscReal         *singular_vals;
1401 #if defined(PETSC_USE_COMPLEX)
1402   PetscReal         *rwork;
1403 #endif
1404 #if defined(PETSC_MISSING_LAPACK_GESVD)
1405   PetscBLASInt      Blas_one_2=1;
1406   PetscScalar       *temp_basis,*correlation_mat;
1407 #else
1408   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1409   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1410 #endif
1411   /* reuse */
1412   PetscInt          olocal_primal_size;
1413   PetscInt          *oprimal_indices_local_idxs;
1414   /* change of basis */
1415   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1416   PetscBool         boolforchange,qr_needed;
1417   PetscBT           touched,change_basis,qr_needed_idx;
1418   /* auxiliary stuff */
1419   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1420   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1421   PetscScalar       *temp_quadrature_constraint_work;
1422   /* some quantities */
1423   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1424   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1425 
1426 
1427   PetscFunctionBegin;
1428   /* Destroy Mat objects computed previously */
1429   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1430   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1431   /* Get index sets for faces, edges and vertices from graph */
1432   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1433   /* free unneeded index sets */
1434   if (!pcbddc->use_vertices) {
1435     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1436   }
1437   if (!pcbddc->use_edges) {
1438     for (i=0;i<n_ISForEdges;i++) {
1439       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1440     }
1441     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1442     n_ISForEdges = 0;
1443   }
1444   if (!pcbddc->use_faces) {
1445     for (i=0;i<n_ISForFaces;i++) {
1446       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1447     }
1448     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1449     n_ISForFaces = 0;
1450   }
1451   /* HACKS (the following two blocks of code) */
1452   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1453     pcbddc->use_change_of_basis = PETSC_TRUE;
1454     if (!ISForEdges) {
1455       pcbddc->use_change_on_faces = PETSC_TRUE;
1456     }
1457   }
1458   if (pcbddc->NullSpace) {
1459     /* use_change_of_basis should be consistent among processors */
1460     PetscBool tbool[2],gbool[2];
1461     tbool [0] = pcbddc->use_change_of_basis;
1462     tbool [1] = pcbddc->use_change_on_faces;
1463     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1464     pcbddc->use_change_of_basis = gbool[0];
1465     pcbddc->use_change_on_faces = gbool[1];
1466   }
1467   /* print some info */
1468   if (pcbddc->dbg_flag) {
1469     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1470     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1471     i = 0;
1472     if (ISForVertices) {
1473       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1474     }
1475     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1476     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1477     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1478     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1479   }
1480   /* check if near null space is attached to global mat */
1481   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1482   if (nearnullsp) {
1483     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1484     /* remove any stored info */
1485     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1486     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1487     /* store information for BDDC solver reuse */
1488     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1489     pcbddc->onearnullspace = nearnullsp;
1490     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1491     for (i=0;i<nnsp_size;i++) {
1492       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1493     }
1494   } else { /* if near null space is not provided BDDC uses constants by default */
1495     nnsp_size = 0;
1496     nnsp_has_cnst = PETSC_TRUE;
1497   }
1498   /* get max number of constraints on a single cc */
1499   max_constraints = nnsp_size;
1500   if (nnsp_has_cnst) max_constraints++;
1501 
1502   /*
1503        Evaluate maximum storage size needed by the procedure
1504        - temp_indices will contain start index of each constraint stored as follows
1505        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1506        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1507        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1508                                                                                                                                                          */
1509   total_counts = n_ISForFaces+n_ISForEdges;
1510   total_counts *= max_constraints;
1511   n_vertices = 0;
1512   if (ISForVertices) {
1513     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1514   }
1515   total_counts += n_vertices;
1516   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1517   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1518   total_counts = 0;
1519   max_size_of_constraint = 0;
1520   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1521     if (i<n_ISForEdges) {
1522       used_IS = &ISForEdges[i];
1523     } else {
1524       used_IS = &ISForFaces[i-n_ISForEdges];
1525     }
1526     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1527     total_counts += j;
1528     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1529   }
1530   total_counts *= max_constraints;
1531   total_counts += n_vertices;
1532   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1533   /* get local part of global near null space vectors */
1534   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1535   for (k=0;k<nnsp_size;k++) {
1536     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1537     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539   }
1540 
1541   /* whether or not to skip lapack calls */
1542   skip_lapack = PETSC_TRUE;
1543   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1544 
1545   /* allocate some auxiliary stuff */
1546   if (!skip_lapack || pcbddc->use_qr_single) {
1547     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1548   }
1549 
1550   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1551   if (!skip_lapack) {
1552     PetscScalar temp_work;
1553 
1554 #if defined(PETSC_MISSING_LAPACK_GESVD)
1555     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1556     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1557     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1558     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1559 #if defined(PETSC_USE_COMPLEX)
1560     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1561 #endif
1562     /* now we evaluate the optimal workspace using query with lwork=-1 */
1563     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1564     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1565     lwork = -1;
1566     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1567 #if !defined(PETSC_USE_COMPLEX)
1568     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1569 #else
1570     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1571 #endif
1572     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1573     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1574 #else /* on missing GESVD */
1575     /* SVD */
1576     PetscInt max_n,min_n;
1577     max_n = max_size_of_constraint;
1578     min_n = max_constraints;
1579     if (max_size_of_constraint < max_constraints) {
1580       min_n = max_size_of_constraint;
1581       max_n = max_constraints;
1582     }
1583     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1584 #if defined(PETSC_USE_COMPLEX)
1585     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1586 #endif
1587     /* now we evaluate the optimal workspace using query with lwork=-1 */
1588     lwork = -1;
1589     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1590     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1591     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1592     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1593 #if !defined(PETSC_USE_COMPLEX)
1594     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1595 #else
1596     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1597 #endif
1598     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1599     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1600 #endif /* on missing GESVD */
1601     /* Allocate optimal workspace */
1602     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1603     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1604   }
1605   /* Now we can loop on constraining sets */
1606   total_counts = 0;
1607   temp_indices[0] = 0;
1608   /* vertices */
1609   if (ISForVertices) {
1610     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1611     if (nnsp_has_cnst) { /* consider all vertices */
1612       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1613       for (i=0;i<n_vertices;i++) {
1614         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1615         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1616         total_counts++;
1617       }
1618     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1619       PetscBool used_vertex;
1620       for (i=0;i<n_vertices;i++) {
1621         used_vertex = PETSC_FALSE;
1622         k = 0;
1623         while (!used_vertex && k<nnsp_size) {
1624           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1625           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1626             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1627             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1628             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1629             total_counts++;
1630             used_vertex = PETSC_TRUE;
1631           }
1632           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1633           k++;
1634         }
1635       }
1636     }
1637     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1638     n_vertices = total_counts;
1639   }
1640 
1641   /* edges and faces */
1642   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1643     if (ncc<n_ISForEdges) {
1644       used_IS = &ISForEdges[ncc];
1645       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1646     } else {
1647       used_IS = &ISForFaces[ncc-n_ISForEdges];
1648       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1649     }
1650     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1651     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1652     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1653     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1654     /* change of basis should not be performed on local periodic nodes */
1655     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1656     if (nnsp_has_cnst) {
1657       PetscScalar quad_value;
1658       temp_constraints++;
1659       if (!pcbddc->use_nnsp_true) {
1660         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1661       } else {
1662         quad_value = 1.0;
1663       }
1664       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1665       for (j=0;j<size_of_constraint;j++) {
1666         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1667       }
1668       /* sort by global ordering if using lapack subroutines */
1669       if (!skip_lapack || pcbddc->use_qr_single) {
1670         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1671         for (j=0;j<size_of_constraint;j++) {
1672           permutation[j]=j;
1673         }
1674         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1675         for (j=0;j<size_of_constraint;j++) {
1676           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1677           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1678         }
1679         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1680         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1681       }
1682       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1683       total_counts++;
1684     }
1685     for (k=0;k<nnsp_size;k++) {
1686       PetscReal real_value;
1687       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1688       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1689       for (j=0;j<size_of_constraint;j++) {
1690         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1691       }
1692       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1693       /* check if array is null on the connected component */
1694       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1695       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1696       if (real_value > 0.0) { /* keep indices and values */
1697         /* sort by global ordering if using lapack subroutines */
1698         if (!skip_lapack || pcbddc->use_qr_single) {
1699           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1700           for (j=0;j<size_of_constraint;j++) {
1701             permutation[j]=j;
1702           }
1703           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1704           for (j=0;j<size_of_constraint;j++) {
1705             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1706             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1707           }
1708           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1709           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1710         }
1711         temp_constraints++;
1712         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1713         total_counts++;
1714       }
1715     }
1716     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1717     valid_constraints = temp_constraints;
1718     if (!pcbddc->use_nnsp_true && temp_constraints) {
1719       if (temp_constraints == 1) { /* just normalize the constraint */
1720         PetscScalar norm;
1721         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1722         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1723         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1724         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1725       } else { /* perform SVD */
1726         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1727 
1728 #if defined(PETSC_MISSING_LAPACK_GESVD)
1729         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1730            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1731            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1732               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1733               from that computed using LAPACKgesvd
1734            -> This is due to a different computation of eigenvectors in LAPACKheev
1735            -> The quality of the POD-computed basis will be the same */
1736         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1737         /* Store upper triangular part of correlation matrix */
1738         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1739         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1740         for (j=0;j<temp_constraints;j++) {
1741           for (k=0;k<j+1;k++) {
1742             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1743           }
1744         }
1745         /* compute eigenvalues and eigenvectors of correlation matrix */
1746         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1747         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1748 #if !defined(PETSC_USE_COMPLEX)
1749         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1750 #else
1751         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1752 #endif
1753         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1754         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1755         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1756         j = 0;
1757         while (j < temp_constraints && singular_vals[j] < tol) j++;
1758         total_counts = total_counts-j;
1759         valid_constraints = temp_constraints-j;
1760         /* scale and copy POD basis into used quadrature memory */
1761         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1762         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1763         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1764         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1765         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1766         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1767         if (j<temp_constraints) {
1768           PetscInt ii;
1769           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1770           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1771           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1772           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1773           for (k=0;k<temp_constraints-j;k++) {
1774             for (ii=0;ii<size_of_constraint;ii++) {
1775               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1776             }
1777           }
1778         }
1779 #else  /* on missing GESVD */
1780         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1781         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1782         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1783         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1784 #if !defined(PETSC_USE_COMPLEX)
1785         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1786 #else
1787         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1788 #endif
1789         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1790         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1791         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1792         k = temp_constraints;
1793         if (k > size_of_constraint) k = size_of_constraint;
1794         j = 0;
1795         while (j < k && singular_vals[k-j-1] < tol) j++;
1796         valid_constraints = k-j;
1797         total_counts = total_counts-temp_constraints+valid_constraints;
1798 #endif /* on missing GESVD */
1799       }
1800     }
1801     /* setting change_of_basis flag is safe now */
1802     if (boolforchange) {
1803       for (j=0;j<valid_constraints;j++) {
1804         PetscBTSet(change_basis,total_counts-j-1);
1805       }
1806     }
1807   }
1808   /* free index sets of faces, edges and vertices */
1809   for (i=0;i<n_ISForFaces;i++) {
1810     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1811   }
1812   if (n_ISForFaces) {
1813     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1814   }
1815   for (i=0;i<n_ISForEdges;i++) {
1816     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1817   }
1818   if (n_ISForEdges) {
1819     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1820   }
1821   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1822   /* map temp_indices_to_constraint in boundary numbering */
1823   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1824   if (i != temp_indices[total_counts]) {
1825     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1826   }
1827 
1828   /* free workspace */
1829   if (!skip_lapack || pcbddc->use_qr_single) {
1830     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1831   }
1832   if (!skip_lapack) {
1833     ierr = PetscFree(work);CHKERRQ(ierr);
1834 #if defined(PETSC_USE_COMPLEX)
1835     ierr = PetscFree(rwork);CHKERRQ(ierr);
1836 #endif
1837     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1838 #if defined(PETSC_MISSING_LAPACK_GESVD)
1839     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1840     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1841 #endif
1842   }
1843   for (k=0;k<nnsp_size;k++) {
1844     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1845   }
1846   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1847 
1848   /* set quantities in pcbddc data structure and store previous primal size */
1849   /* n_vertices defines the number of subdomain corners in the primal space */
1850   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1851   olocal_primal_size = pcbddc->local_primal_size;
1852   pcbddc->local_primal_size = total_counts;
1853   pcbddc->n_vertices = n_vertices;
1854   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1855 
1856   /* Create constraint matrix */
1857   /* The constraint matrix is used to compute the l2g map of primal dofs */
1858   /* so we need to set it up properly either with or without change of basis */
1859   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1860   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1861   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1862   /* array to compute a local numbering of constraints : vertices first then constraints */
1863   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1864   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1865   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1866   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1867   /* auxiliary stuff for basis change */
1868   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1869   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1870 
1871   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1872   total_primal_vertices=0;
1873   for (i=0;i<pcbddc->local_primal_size;i++) {
1874     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1875     if (size_of_constraint == 1) {
1876       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1877       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1878       aux_primal_minloc[total_primal_vertices]=0;
1879       total_primal_vertices++;
1880     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1881       PetscInt min_loc,min_index;
1882       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1883       /* find first untouched local node */
1884       k = 0;
1885       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1886       min_index = global_indices[k];
1887       min_loc = k;
1888       /* search the minimum among global nodes already untouched on the cc */
1889       for (k=1;k<size_of_constraint;k++) {
1890         /* there can be more than one constraint on a single connected component */
1891         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1892           min_index = global_indices[k];
1893           min_loc = k;
1894         }
1895       }
1896       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1897       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1898       aux_primal_minloc[total_primal_vertices]=min_loc;
1899       total_primal_vertices++;
1900     }
1901   }
1902   /* determine if a QR strategy is needed for change of basis */
1903   qr_needed = PETSC_FALSE;
1904   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1905   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1906     if (PetscBTLookup(change_basis,i)) {
1907       if (!pcbddc->use_qr_single) {
1908         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1909         j = 0;
1910         for (k=0;k<size_of_constraint;k++) {
1911           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1912             j++;
1913           }
1914         }
1915         /* found more than one primal dof on the cc */
1916         if (j > 1) {
1917           PetscBTSet(qr_needed_idx,i);
1918           qr_needed = PETSC_TRUE;
1919         }
1920       } else {
1921         PetscBTSet(qr_needed_idx,i);
1922         qr_needed = PETSC_TRUE;
1923       }
1924     }
1925   }
1926   /* free workspace */
1927   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1928 
1929   /* permute indices in order to have a sorted set of vertices */
1930   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1931 
1932   /* nonzero structure of constraint matrix */
1933   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1934   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1935   j=total_primal_vertices;
1936   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1937     if (!PetscBTLookup(change_basis,i)) {
1938       nnz[j]=temp_indices[i+1]-temp_indices[i];
1939       j++;
1940     }
1941   }
1942   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1943   ierr = PetscFree(nnz);CHKERRQ(ierr);
1944   /* set values in constraint matrix */
1945   for (i=0;i<total_primal_vertices;i++) {
1946     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1947   }
1948   total_counts = total_primal_vertices;
1949   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1950     if (!PetscBTLookup(change_basis,i)) {
1951       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1952       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1953       total_counts++;
1954     }
1955   }
1956   /* assembling */
1957   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959   /*
1960   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1961   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1962   */
1963   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1964   if (pcbddc->use_change_of_basis) {
1965     /* dual and primal dofs on a single cc */
1966     PetscInt     dual_dofs,primal_dofs;
1967     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1968     PetscInt     primal_counter;
1969     /* working stuff for GEQRF */
1970     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1971     PetscBLASInt lqr_work;
1972     /* working stuff for UNGQR */
1973     PetscScalar  *gqr_work,lgqr_work_t;
1974     PetscBLASInt lgqr_work;
1975     /* working stuff for TRTRS */
1976     PetscScalar  *trs_rhs;
1977     PetscBLASInt Blas_NRHS;
1978     /* pointers for values insertion into change of basis matrix */
1979     PetscInt     *start_rows,*start_cols;
1980     PetscScalar  *start_vals;
1981     /* working stuff for values insertion */
1982     PetscBT      is_primal;
1983     /* matrix sizes */
1984     PetscInt     global_size,local_size;
1985     /* work array for nonzeros */
1986     PetscScalar  *nnz_array;
1987     /* temporary change of basis */
1988     Mat          localChangeOfBasisMatrix;
1989     /* auxiliary work for global change of basis */
1990     Vec          nnz_vec;
1991     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
1992     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
1993     PetscScalar  *vals;
1994     PetscBool    done;
1995 
1996     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
1997     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
1998     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1999     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2000 
2001     /* nonzeros for local mat */
2002     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2003     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2004     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2005       if (PetscBTLookup(change_basis,i)) {
2006         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2007         if (PetscBTLookup(qr_needed_idx,i)) {
2008           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2009         } else {
2010           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2011           /* get local primal index on the cc */
2012           j = 0;
2013           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2014           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2015         }
2016       }
2017     }
2018     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2019     /* Set initial identity in the matrix */
2020     for (i=0;i<pcis->n_B;i++) {
2021       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2022     }
2023 
2024     if (pcbddc->dbg_flag) {
2025       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2026       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2027     }
2028 
2029 
2030     /* Now we loop on the constraints which need a change of basis */
2031     /*
2032        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2033        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2034 
2035        Basic blocks of change of basis matrix T computed by
2036 
2037           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2038 
2039             | 1        0   ...        0         s_1/S |
2040             | 0        1   ...        0         s_2/S |
2041             |              ...                        |
2042             | 0        ...            1     s_{n-1}/S |
2043             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2044 
2045             with S = \sum_{i=1}^n s_i^2
2046             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2047                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2048 
2049           - QR decomposition of constraints otherwise
2050     */
2051     if (qr_needed) {
2052       /* space to store Q */
2053       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2054       /* first we issue queries for optimal work */
2055       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2056       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2057       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2058       lqr_work = -1;
2059       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2060       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2061       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2062       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2063       lgqr_work = -1;
2064       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2065       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2066       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2067       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2068       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2069       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2070       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2071       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2072       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2073       /* array to store scaling factors for reflectors */
2074       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2075       /* array to store rhs and solution of triangular solver */
2076       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2077       /* allocating workspace for check */
2078       if (pcbddc->dbg_flag) {
2079         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2080       }
2081     }
2082     /* array to store whether a node is primal or not */
2083     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2084     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2085     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2086     if (i != total_primal_vertices) {
2087       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2088     }
2089     for (i=0;i<total_primal_vertices;i++) {
2090       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2091     }
2092     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2093 
2094     /* loop on constraints and see whether or not they need a change of basis and compute it */
2095     /* -> using implicit ordering contained in temp_indices data */
2096     total_counts = pcbddc->n_vertices;
2097     primal_counter = total_counts;
2098     while (total_counts<pcbddc->local_primal_size) {
2099       primal_dofs = 1;
2100       if (PetscBTLookup(change_basis,total_counts)) {
2101         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2102         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2103           primal_dofs++;
2104         }
2105         /* get constraint info */
2106         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2107         dual_dofs = size_of_constraint-primal_dofs;
2108 
2109         if (pcbddc->dbg_flag) {
2110           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2111         }
2112 
2113         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2114 
2115           /* copy quadrature constraints for change of basis check */
2116           if (pcbddc->dbg_flag) {
2117             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2118           }
2119           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2120           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2121 
2122           /* compute QR decomposition of constraints */
2123           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2124           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2125           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2126           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2127           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2128           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2129           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130 
2131           /* explictly compute R^-T */
2132           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2133           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2134           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2137           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2138           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2139           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2140           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2141           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2142 
2143           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2144           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2145           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2146           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2147           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2148           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2149           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2150           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2151           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2152 
2153           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2154              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2155              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2156           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2157           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2158           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2159           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2160           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2161           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2162           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2163           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2164           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2165           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2166 
2167           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2168           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2169           /* insert cols for primal dofs */
2170           for (j=0;j<primal_dofs;j++) {
2171             start_vals = &qr_basis[j*size_of_constraint];
2172             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2173             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2174           }
2175           /* insert cols for dual dofs */
2176           for (j=0,k=0;j<dual_dofs;k++) {
2177             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2178               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2179               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2180               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2181               j++;
2182             }
2183           }
2184 
2185           /* check change of basis */
2186           if (pcbddc->dbg_flag) {
2187             PetscInt   ii,jj;
2188             PetscBool valid_qr=PETSC_TRUE;
2189             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2190             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2191             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2192             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2193             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2194             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2195             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2196             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2197             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2198             for (jj=0;jj<size_of_constraint;jj++) {
2199               for (ii=0;ii<primal_dofs;ii++) {
2200                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2201                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2202               }
2203             }
2204             if (!valid_qr) {
2205               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2206               for (jj=0;jj<size_of_constraint;jj++) {
2207                 for (ii=0;ii<primal_dofs;ii++) {
2208                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2209                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2210                   }
2211                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2212                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2213                   }
2214                 }
2215               }
2216             } else {
2217               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2218             }
2219           }
2220         } else { /* simple transformation block */
2221           PetscInt    row,col;
2222           PetscScalar val,norm;
2223 
2224           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2225           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2226           for (j=0;j<size_of_constraint;j++) {
2227             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2228             if (!PetscBTLookup(is_primal,row)) {
2229               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2230               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2231               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2232             } else {
2233               for (k=0;k<size_of_constraint;k++) {
2234                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2235                 if (row != col) {
2236                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2237                 } else {
2238                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2239                 }
2240                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2241               }
2242             }
2243           }
2244           if (pcbddc->dbg_flag) {
2245             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2246           }
2247         }
2248         /* increment primal counter */
2249         primal_counter += primal_dofs;
2250       } else {
2251         if (pcbddc->dbg_flag) {
2252           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2253         }
2254       }
2255       /* increment constraint counter total_counts */
2256       total_counts += primal_dofs;
2257     }
2258 
2259     /* free workspace */
2260     if (qr_needed) {
2261       if (pcbddc->dbg_flag) {
2262         ierr = PetscFree(work);CHKERRQ(ierr);
2263       }
2264       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2265       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2266       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2267       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2268       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2269     }
2270     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2271     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273 
2274     /* assembling of global change of variable */
2275     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2276     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2277     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2278     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2279     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2280     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2281 
2282     /* nonzeros (overestimated) */
2283     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2284     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2285     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2286     for (i=0;i<pcis->n;i++) {
2287       nnz_array[i] = 1.0;
2288       idxs_all[i] = i;
2289     }
2290     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2291     for (i=0;i<pcis->n_B;i++) {
2292       nnz_array[idxs_B[i]] = nnz[i];
2293     }
2294     if (pcis->n) {
2295       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2296     }
2297     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2298     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2299     ierr = PetscFree(nnz);CHKERRQ(ierr);
2300     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2301     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2302     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2303     for (i=0;i<local_size;i++) {
2304       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2305       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2306     }
2307     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2308     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2309     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2310     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2311 
2312     /* Set identity on dirichlet dofs */
2313     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2314     for (i=0;i<pcis->n-pcis->n_B;i++) {
2315       PetscScalar one=1.0;
2316       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2317     }
2318     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2319 
2320     /* Set values at interface dofs */
2321     done = PETSC_TRUE;
2322     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2323     if (!done) {
2324       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2325     }
2326     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2327     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2328     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2329     for (i=0;i<nvtxs;i++) {
2330       PetscInt    row,*cols,ncols;
2331       PetscScalar *mat_vals;
2332 
2333       row = idxs_B[i];
2334       ncols = xadj[i+1]-xadj[i];
2335       cols = idxs_mapped+xadj[i];
2336       mat_vals = vals+xadj[i];
2337       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2338     }
2339     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2340     if (!done) {
2341       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2342     }
2343     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2344     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2345     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2346     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2347     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348 
2349     /* check */
2350     if (pcbddc->dbg_flag) {
2351       PetscReal error;
2352       Vec       x,x_change;
2353 
2354       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2355       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2356       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2357       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2358       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2359       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2360       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2361       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2362       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2363       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2364       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2365       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2366       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2367       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2368       ierr = VecDestroy(&x);CHKERRQ(ierr);
2369       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2370     }
2371     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2372   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2373     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2374     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2375   }
2376 
2377   /* set up change of basis context */
2378   if (pcbddc->ChangeOfBasisMatrix) {
2379     PCBDDCChange_ctx change_ctx;
2380 
2381     if (!pcbddc->new_global_mat) {
2382       PetscInt global_size,local_size;
2383 
2384       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2385       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2386       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2387       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2388       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2389       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2390       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2391       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2392       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2393     } else {
2394       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2395       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2396       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2397     }
2398     if (!pcbddc->user_ChangeOfBasisMatrix) {
2399       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2400       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2401     } else {
2402       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2403       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2404     }
2405     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2406     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2407     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2408     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   }
2410 
2411   /* get indices in local ordering for vertices and constraints */
2412   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2413     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2414     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2415   }
2416   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2417   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2418   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2419   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2420   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2421   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2422   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2423   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2424   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2425   /* set quantities in PCBDDC data struct */
2426   pcbddc->n_actual_vertices = i;
2427   /* check if a new primal space has been introduced */
2428   pcbddc->new_primal_space_local = PETSC_TRUE;
2429   if (olocal_primal_size == pcbddc->local_primal_size) {
2430     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2431     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2432     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2433   }
2434   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2435   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2436 
2437   /* flush dbg viewer */
2438   if (pcbddc->dbg_flag) {
2439     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2440   }
2441 
2442   /* free workspace */
2443   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2444   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2445   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2446   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2447   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2448   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2454 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2455 {
2456   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2457   PC_IS       *pcis = (PC_IS*)pc->data;
2458   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2459   PetscInt    ierr,i,vertex_size;
2460   PetscViewer viewer=pcbddc->dbg_viewer;
2461 
2462   PetscFunctionBegin;
2463   /* Reset previously computed graph */
2464   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2465   /* Init local Graph struct */
2466   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2467 
2468   /* Check validity of the csr graph passed in by the user */
2469   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2470     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2471   }
2472 
2473   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2474   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2475     Mat       mat_adj;
2476     PetscInt  *xadj,*adjncy;
2477     PetscInt  nvtxs;
2478     PetscBool flg_row=PETSC_TRUE;
2479 
2480     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2481     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2482     if (!flg_row) {
2483       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2484     }
2485     if (pcbddc->use_local_adj) {
2486       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2487       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2488     } else { /* just compute subdomain's connected components */
2489       IS                     is_dummy;
2490       ISLocalToGlobalMapping l2gmap_dummy;
2491       PetscInt               j,sum;
2492       PetscInt               *cxadj,*cadjncy;
2493       const PetscInt         *idxs;
2494       PCBDDCGraph            graph;
2495       PetscBT                is_on_boundary;
2496 
2497       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2498       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2499       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2500       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2501       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2502       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2503       graph->xadj = xadj;
2504       graph->adjncy = adjncy;
2505       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2506       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2507 
2508       if (pcbddc->dbg_flag) {
2509         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2510         for (i=0;i<graph->ncc;i++) {
2511           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2512         }
2513         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2514       }
2515 
2516       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2517       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2518       for (i=0;i<pcis->n_B;i++) {
2519         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2520       }
2521       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2522 
2523       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2524       sum = 0;
2525       for (i=0;i<graph->ncc;i++) {
2526         PetscInt sizecc = 0;
2527         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2528           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2529             sizecc++;
2530           }
2531         }
2532         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2533           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2534             cxadj[graph->queue[j]] = sizecc;
2535           }
2536         }
2537         sum += sizecc*sizecc;
2538       }
2539       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2540       sum = 0;
2541       for (i=0;i<nvtxs;i++) {
2542         PetscInt temp = cxadj[i];
2543         cxadj[i] = sum;
2544         sum += temp;
2545       }
2546       cxadj[nvtxs] = sum;
2547       for (i=0;i<graph->ncc;i++) {
2548         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2549           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2550             PetscInt k,sizecc = 0;
2551             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2552               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2553                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2554                 sizecc++;
2555               }
2556             }
2557           }
2558         }
2559       }
2560       if (nvtxs) {
2561         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2562       } else {
2563         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2564         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2565       }
2566       graph->xadj = 0;
2567       graph->adjncy = 0;
2568       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2569       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2570     }
2571     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2572     if (!flg_row) {
2573       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2574     }
2575     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2576   }
2577 
2578   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2579   vertex_size = 1;
2580   if (pcbddc->user_provided_isfordofs) {
2581     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2582       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2583       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2584         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2585         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2586       }
2587       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2588       pcbddc->n_ISForDofs = 0;
2589       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2590     }
2591     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2592     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2593   } else {
2594     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2595       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2596       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2597       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2598         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2599       }
2600     }
2601   }
2602 
2603   /* Setup of Graph */
2604   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2605     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2606   }
2607   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2608     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2609   }
2610   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2611 
2612   /* Graph's connected components analysis */
2613   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2614 
2615   /* print some info to stdout */
2616   if (pcbddc->dbg_flag) {
2617     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2618   }
2619 
2620   /* mark topography has done */
2621   pcbddc->recompute_topography = PETSC_FALSE;
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2627 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2628 {
2629   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2630   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2631   PetscErrorCode ierr;
2632 
2633   PetscFunctionBegin;
2634   n = 0;
2635   vertices = 0;
2636   if (pcbddc->ConstraintMatrix) {
2637     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2638     for (i=0;i<local_primal_size;i++) {
2639       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2640       if (size_of_constraint == 1) n++;
2641       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2642     }
2643     if (vertices_idx) {
2644       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2645       n = 0;
2646       for (i=0;i<local_primal_size;i++) {
2647         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2648         if (size_of_constraint == 1) {
2649           vertices[n++]=row_cmat_indices[0];
2650         }
2651         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2652       }
2653     }
2654   }
2655   *n_vertices = n;
2656   if (vertices_idx) *vertices_idx = vertices;
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2662 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2663 {
2664   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2665   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2666   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2667   PetscBT        touched;
2668   PetscErrorCode ierr;
2669 
2670     /* This function assumes that the number of local constraints per connected component
2671        is not greater than the number of nodes defined for the connected component
2672        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2673   PetscFunctionBegin;
2674   n = 0;
2675   constraints_index = 0;
2676   if (pcbddc->ConstraintMatrix) {
2677     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2678     max_size_of_constraint = 0;
2679     for (i=0;i<local_primal_size;i++) {
2680       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2681       if (size_of_constraint > 1) {
2682         n++;
2683       }
2684       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2685       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2686     }
2687     if (constraints_idx) {
2688       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2689       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2690       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2691       n = 0;
2692       for (i=0;i<local_primal_size;i++) {
2693         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2694         if (size_of_constraint > 1) {
2695           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2696           /* find first untouched local node */
2697           j = 0;
2698           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2699           min_index = row_cmat_global_indices[j];
2700           min_loc = j;
2701           /* search the minimum among nodes not yet touched on the connected component
2702              since there can be more than one constraint on a single cc */
2703           for (j=1;j<size_of_constraint;j++) {
2704             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2705               min_index = row_cmat_global_indices[j];
2706               min_loc = j;
2707             }
2708           }
2709           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2710           constraints_index[n++] = row_cmat_indices[min_loc];
2711         }
2712         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2713       }
2714       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2715       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2716     }
2717   }
2718   *n_constraints = n;
2719   if (constraints_idx) *constraints_idx = constraints_index;
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "PCBDDCSubsetNumbering"
2725 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2726 {
2727   Vec            local_vec,global_vec;
2728   IS             seqis,paris;
2729   VecScatter     scatter_ctx;
2730   PetscScalar    *array;
2731   PetscInt       *temp_global_dofs;
2732   PetscScalar    globalsum;
2733   PetscInt       i,j,s;
2734   PetscInt       nlocals,first_index,old_index,max_local;
2735   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2736   PetscMPIInt    *dof_sizes,*dof_displs;
2737   PetscBool      first_found;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   /* mpi buffers */
2742   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2743   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2744   j = ( !rank_prec_comm ? size_prec_comm : 0);
2745   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2746   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2747   /* get maximum size of subset */
2748   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2749   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2750   max_local = 0;
2751   for (i=0;i<n_local_dofs;i++) {
2752     if (max_local < temp_global_dofs[i] ) {
2753       max_local = temp_global_dofs[i];
2754     }
2755   }
2756   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2757   max_global++;
2758   max_local = 0;
2759   for (i=0;i<n_local_dofs;i++) {
2760     if (max_local < local_dofs[i] ) {
2761       max_local = local_dofs[i];
2762     }
2763   }
2764   max_local++;
2765   /* allocate workspace */
2766   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2767   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2768   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2769   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2770   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2771   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2772   /* create scatter */
2773   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2774   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2775   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2776   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2777   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2778   /* init array */
2779   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2780   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2781   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2782   if (local_dofs_mult) {
2783     for (i=0;i<n_local_dofs;i++) {
2784       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2785     }
2786   } else {
2787     for (i=0;i<n_local_dofs;i++) {
2788       array[local_dofs[i]]=1.0;
2789     }
2790   }
2791   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2792   /* scatter into global vec and get total number of global dofs */
2793   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2794   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2795   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2796   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2797   /* Fill global_vec with cumulative function for global numbering */
2798   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2799   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2800   nlocals = 0;
2801   first_index = -1;
2802   first_found = PETSC_FALSE;
2803   for (i=0;i<s;i++) {
2804     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2805       first_found = PETSC_TRUE;
2806       first_index = i;
2807     }
2808     nlocals += (PetscInt)PetscRealPart(array[i]);
2809   }
2810   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   if (!rank_prec_comm) {
2812     dof_displs[0]=0;
2813     for (i=1;i<size_prec_comm;i++) {
2814       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2815     }
2816   }
2817   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2818   if (first_found) {
2819     array[first_index] += (PetscScalar)nlocals;
2820     old_index = first_index;
2821     for (i=first_index+1;i<s;i++) {
2822       if (PetscRealPart(array[i]) > 0.1) {
2823         array[i] += array[old_index];
2824         old_index = i;
2825       }
2826     }
2827   }
2828   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2829   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2830   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832   /* get global ordering of local dofs */
2833   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2834   if (local_dofs_mult) {
2835     for (i=0;i<n_local_dofs;i++) {
2836       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2837     }
2838   } else {
2839     for (i=0;i<n_local_dofs;i++) {
2840       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2841     }
2842   }
2843   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2844   /* free workspace */
2845   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2846   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2847   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2848   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2849   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2850   /* return pointer to global ordering of local dofs */
2851   *global_numbering_subset = temp_global_dofs;
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2857 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2858 {
2859   PetscInt       i,j;
2860   PetscScalar    *alphas;
2861   PetscErrorCode ierr;
2862 
2863   PetscFunctionBegin;
2864   /* this implements stabilized Gram-Schmidt */
2865   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2866   for (i=0;i<n;i++) {
2867     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2868     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2869     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2870   }
2871   ierr = PetscFree(alphas);CHKERRQ(ierr);
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 #undef __FUNCT__
2876 #define __FUNCT__ "MatISGetSubassemblingPattern"
2877 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2878 {
2879   Mat             subdomain_adj;
2880   IS              new_ranks,ranks_send_to;
2881   MatPartitioning partitioner;
2882   Mat_IS          *matis;
2883   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2884   PetscInt        prank;
2885   PetscMPIInt     size,rank,color;
2886   PetscInt        *xadj,*adjncy,*oldranks;
2887   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2888   PetscInt        i,local_size,threshold=0;
2889   PetscErrorCode  ierr;
2890   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2891   PetscSubcomm    subcomm;
2892 
2893   PetscFunctionBegin;
2894   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2895   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2896   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2897 
2898   /* Get info on mapping */
2899   matis = (Mat_IS*)(mat->data);
2900   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2901   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2902 
2903   /* build local CSR graph of subdomains' connectivity */
2904   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2905   xadj[0] = 0;
2906   xadj[1] = PetscMax(n_neighs-1,0);
2907   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2908   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2909 
2910   if (threshold) {
2911     PetscInt xadj_count = 0;
2912     for (i=1;i<n_neighs;i++) {
2913       if (n_shared[i] > threshold) {
2914         adjncy[xadj_count] = neighs[i];
2915         adjncy_wgt[xadj_count] = n_shared[i];
2916         xadj_count++;
2917       }
2918     }
2919     xadj[1] = xadj_count;
2920   } else {
2921     if (xadj[1]) {
2922       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2923       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2924     }
2925   }
2926   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2927   if (use_square) {
2928     for (i=0;i<xadj[1];i++) {
2929       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2930     }
2931   }
2932   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2933 
2934   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2935 
2936   /*
2937     Restrict work on active processes only.
2938   */
2939   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2940   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2941   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2942   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2943   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2944   if (color) {
2945     ierr = PetscFree(xadj);CHKERRQ(ierr);
2946     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2947     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2948   } else {
2949     PetscInt coarsening_ratio;
2950     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2951     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2952     prank = rank;
2953     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2954     /*
2955     for (i=0;i<size;i++) {
2956       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2957     }
2958     */
2959     for (i=0;i<xadj[1];i++) {
2960       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2961     }
2962     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2963     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2964     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2965 
2966     /* Partition */
2967     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2968     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2969     if (use_vwgt) {
2970       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
2971       v_wgt[0] = local_size;
2972       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2973     }
2974     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2975     coarsening_ratio = size/n_subdomains;
2976     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2977     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2978     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2979     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2980 
2981     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2982     if (contiguous) {
2983       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2984     } else {
2985       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2986     }
2987     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2988     /* clean up */
2989     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2990     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2991     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2992     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2993   }
2994   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2995 
2996   /* assemble parallel IS for sends */
2997   i = 1;
2998   if (color) i=0;
2999   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3000 
3001   /* get back IS */
3002   *is_sends = ranks_send_to;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatISSubassemble"
3010 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3011 {
3012   Mat                    local_mat;
3013   Mat_IS                 *matis;
3014   IS                     is_sends_internal;
3015   PetscInt               rows,cols,new_local_rows;
3016   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3017   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3018   ISLocalToGlobalMapping l2gmap;
3019   PetscInt*              l2gmap_indices;
3020   const PetscInt*        is_indices;
3021   MatType                new_local_type;
3022   /* buffers */
3023   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3024   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3025   PetscInt               *recv_buffer_idxs_local;
3026   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3027   /* MPI */
3028   MPI_Comm               comm,comm_n;
3029   PetscSubcomm           subcomm;
3030   PetscMPIInt            n_sends,n_recvs,commsize;
3031   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3032   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3033   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3034   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3035   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3036   PetscErrorCode         ierr;
3037 
3038   PetscFunctionBegin;
3039   /* TODO: add missing checks */
3040   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3041   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3042   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3043   PetscValidLogicalCollectiveInt(mat,nis,7);
3044   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3045   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3046   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3047   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3048   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3049   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3050   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3051   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3052     PetscInt mrows,mcols,mnrows,mncols;
3053     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3054     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3055     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3056     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3057     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3058     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3059   }
3060   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3061   PetscValidLogicalCollectiveInt(mat,bs,0);
3062   /* prepare IS for sending if not provided */
3063   if (!is_sends) {
3064     PetscBool pcontig = PETSC_TRUE;
3065     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3066     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3067   } else {
3068     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3069     is_sends_internal = is_sends;
3070   }
3071 
3072   /* get pointer of MATIS data */
3073   matis = (Mat_IS*)mat->data;
3074 
3075   /* get comm */
3076   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3077 
3078   /* compute number of sends */
3079   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3080   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3081 
3082   /* compute number of receives */
3083   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3084   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3085   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3086   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3087   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3088   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3089   ierr = PetscFree(iflags);CHKERRQ(ierr);
3090 
3091   /* restrict comm if requested */
3092   subcomm = 0;
3093   destroy_mat = PETSC_FALSE;
3094   if (restrict_comm) {
3095     PetscMPIInt color,rank,subcommsize;
3096     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3097     color = 0;
3098     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3099     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3100     subcommsize = commsize - subcommsize;
3101     /* check if reuse has been requested */
3102     if (reuse == MAT_REUSE_MATRIX) {
3103       if (*mat_n) {
3104         PetscMPIInt subcommsize2;
3105         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3106         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3107         comm_n = PetscObjectComm((PetscObject)*mat_n);
3108       } else {
3109         comm_n = PETSC_COMM_SELF;
3110       }
3111     } else { /* MAT_INITIAL_MATRIX */
3112       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3113       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3114       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3115       comm_n = subcomm->comm;
3116     }
3117     /* flag to destroy *mat_n if not significative */
3118     if (color) destroy_mat = PETSC_TRUE;
3119   } else {
3120     comm_n = comm;
3121   }
3122 
3123   /* prepare send/receive buffers */
3124   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3125   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3126   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3127   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3128   if (nis) {
3129     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3130   }
3131 
3132   /* Get data from local matrices */
3133   if (!isdense) {
3134     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3135     /* TODO: See below some guidelines on how to prepare the local buffers */
3136     /*
3137        send_buffer_vals should contain the raw values of the local matrix
3138        send_buffer_idxs should contain:
3139        - MatType_PRIVATE type
3140        - PetscInt        size_of_l2gmap
3141        - PetscInt        global_row_indices[size_of_l2gmap]
3142        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3143     */
3144   } else {
3145     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3146     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3147     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3148     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3149     send_buffer_idxs[1] = i;
3150     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3151     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3153     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3154     for (i=0;i<n_sends;i++) {
3155       ilengths_vals[is_indices[i]] = len*len;
3156       ilengths_idxs[is_indices[i]] = len+2;
3157     }
3158   }
3159   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3160   /* additional is (if any) */
3161   if (nis) {
3162     PetscMPIInt psum;
3163     PetscInt j;
3164     for (j=0,psum=0;j<nis;j++) {
3165       PetscInt plen;
3166       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3167       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3168       psum += len+1; /* indices + lenght */
3169     }
3170     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3171     for (j=0,psum=0;j<nis;j++) {
3172       PetscInt plen;
3173       const PetscInt *is_array_idxs;
3174       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3175       send_buffer_idxs_is[psum] = plen;
3176       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3177       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3178       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3179       psum += plen+1; /* indices + lenght */
3180     }
3181     for (i=0;i<n_sends;i++) {
3182       ilengths_idxs_is[is_indices[i]] = psum;
3183     }
3184     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3185   }
3186 
3187   buf_size_idxs = 0;
3188   buf_size_vals = 0;
3189   buf_size_idxs_is = 0;
3190   for (i=0;i<n_recvs;i++) {
3191     buf_size_idxs += (PetscInt)olengths_idxs[i];
3192     buf_size_vals += (PetscInt)olengths_vals[i];
3193     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3194   }
3195   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3196   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3197   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3198 
3199   /* get new tags for clean communications */
3200   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3201   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3202   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3203 
3204   /* allocate for requests */
3205   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3206   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3207   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3208   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3210   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3211 
3212   /* communications */
3213   ptr_idxs = recv_buffer_idxs;
3214   ptr_vals = recv_buffer_vals;
3215   ptr_idxs_is = recv_buffer_idxs_is;
3216   for (i=0;i<n_recvs;i++) {
3217     source_dest = onodes[i];
3218     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3219     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3220     ptr_idxs += olengths_idxs[i];
3221     ptr_vals += olengths_vals[i];
3222     if (nis) {
3223       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3224       ptr_idxs_is += olengths_idxs_is[i];
3225     }
3226   }
3227   for (i=0;i<n_sends;i++) {
3228     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3229     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3230     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3231     if (nis) {
3232       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3233     }
3234   }
3235   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3236   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3237 
3238   /* assemble new l2g map */
3239   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3240   ptr_idxs = recv_buffer_idxs;
3241   new_local_rows = 0;
3242   for (i=0;i<n_recvs;i++) {
3243     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3244     ptr_idxs += olengths_idxs[i];
3245   }
3246   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3247   ptr_idxs = recv_buffer_idxs;
3248   new_local_rows = 0;
3249   for (i=0;i<n_recvs;i++) {
3250     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3251     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3252     ptr_idxs += olengths_idxs[i];
3253   }
3254   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3255   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3256   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3257 
3258   /* infer new local matrix type from received local matrices type */
3259   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3260   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3261   if (n_recvs) {
3262     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3263     ptr_idxs = recv_buffer_idxs;
3264     for (i=0;i<n_recvs;i++) {
3265       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3266         new_local_type_private = MATAIJ_PRIVATE;
3267         break;
3268       }
3269       ptr_idxs += olengths_idxs[i];
3270     }
3271     switch (new_local_type_private) {
3272       case MATDENSE_PRIVATE:
3273         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3274           new_local_type = MATSEQAIJ;
3275           bs = 1;
3276         } else { /* if I receive only 1 dense matrix */
3277           new_local_type = MATSEQDENSE;
3278           bs = 1;
3279         }
3280         break;
3281       case MATAIJ_PRIVATE:
3282         new_local_type = MATSEQAIJ;
3283         bs = 1;
3284         break;
3285       case MATBAIJ_PRIVATE:
3286         new_local_type = MATSEQBAIJ;
3287         break;
3288       case MATSBAIJ_PRIVATE:
3289         new_local_type = MATSEQSBAIJ;
3290         break;
3291       default:
3292         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3293         break;
3294     }
3295   } else { /* by default, new_local_type is seqdense */
3296     new_local_type = MATSEQDENSE;
3297     bs = 1;
3298   }
3299 
3300   /* create MATIS object if needed */
3301   if (reuse == MAT_INITIAL_MATRIX) {
3302     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3303     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3304   } else {
3305     /* it also destroys the local matrices */
3306     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3307   }
3308   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3309   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3310 
3311   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3312 
3313   /* Global to local map of received indices */
3314   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3315   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3316   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3317 
3318   /* restore attributes -> type of incoming data and its size */
3319   buf_size_idxs = 0;
3320   for (i=0;i<n_recvs;i++) {
3321     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3322     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3323     buf_size_idxs += (PetscInt)olengths_idxs[i];
3324   }
3325   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3326 
3327   /* set preallocation */
3328   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3329   if (!newisdense) {
3330     PetscInt *new_local_nnz=0;
3331 
3332     ptr_vals = recv_buffer_vals;
3333     ptr_idxs = recv_buffer_idxs_local;
3334     if (n_recvs) {
3335       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3336     }
3337     for (i=0;i<n_recvs;i++) {
3338       PetscInt j;
3339       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3340         for (j=0;j<*(ptr_idxs+1);j++) {
3341           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3342         }
3343       } else {
3344         /* TODO */
3345       }
3346       ptr_idxs += olengths_idxs[i];
3347     }
3348     if (new_local_nnz) {
3349       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3350       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3351       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3352       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3353       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3354       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3355     } else {
3356       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3357     }
3358     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3359   } else {
3360     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3361   }
3362 
3363   /* set values */
3364   ptr_vals = recv_buffer_vals;
3365   ptr_idxs = recv_buffer_idxs_local;
3366   for (i=0;i<n_recvs;i++) {
3367     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3368       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3369       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3370       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3371       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3372       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3373     } else {
3374       /* TODO */
3375     }
3376     ptr_idxs += olengths_idxs[i];
3377     ptr_vals += olengths_vals[i];
3378   }
3379   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3380   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3381   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3382   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3383   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3384   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3385 
3386 #if 0
3387   if (!restrict_comm) { /* check */
3388     Vec       lvec,rvec;
3389     PetscReal infty_error;
3390 
3391     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3392     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3393     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3394     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3395     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3396     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3397     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3398     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3399     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3400   }
3401 #endif
3402 
3403   /* assemble new additional is (if any) */
3404   if (nis) {
3405     PetscInt **temp_idxs,*count_is,j,psum;
3406 
3407     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3408     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3409     ptr_idxs = recv_buffer_idxs_is;
3410     psum = 0;
3411     for (i=0;i<n_recvs;i++) {
3412       for (j=0;j<nis;j++) {
3413         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3414         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3415         psum += plen;
3416         ptr_idxs += plen+1; /* shift pointer to received data */
3417       }
3418     }
3419     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3420     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3421     for (i=1;i<nis;i++) {
3422       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3423     }
3424     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3425     ptr_idxs = recv_buffer_idxs_is;
3426     for (i=0;i<n_recvs;i++) {
3427       for (j=0;j<nis;j++) {
3428         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3429         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3430         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3431         ptr_idxs += plen+1; /* shift pointer to received data */
3432       }
3433     }
3434     for (i=0;i<nis;i++) {
3435       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3436       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3437       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3438     }
3439     ierr = PetscFree(count_is);CHKERRQ(ierr);
3440     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3441     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3442   }
3443   /* free workspace */
3444   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3445   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3446   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3447   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3448   if (isdense) {
3449     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3450     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3451   } else {
3452     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3453   }
3454   if (nis) {
3455     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3456     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3457   }
3458   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3459   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3460   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3461   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3462   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3463   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3464   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3465   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3466   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3467   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3468   ierr = PetscFree(onodes);CHKERRQ(ierr);
3469   if (nis) {
3470     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3471     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3472     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3473   }
3474   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3475   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3476     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3477     for (i=0;i<nis;i++) {
3478       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3479     }
3480   }
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 /* temporary hack into ksp private data structure */
3485 #include <petsc-private/kspimpl.h>
3486 
3487 #undef __FUNCT__
3488 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3489 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3490 {
3491   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3492   PC_IS                  *pcis = (PC_IS*)pc->data;
3493   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3494   MatNullSpace           CoarseNullSpace=NULL;
3495   ISLocalToGlobalMapping coarse_islg;
3496   IS                     coarse_is,*isarray;
3497   PetscInt               i,im_active=-1,active_procs=-1;
3498   PetscInt               nis,nisdofs,nisneu;
3499   PC                     pc_temp;
3500   PCType                 coarse_pc_type;
3501   KSPType                coarse_ksp_type;
3502   PetscBool              multilevel_requested,multilevel_allowed;
3503   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3504   Mat                    t_coarse_mat_is;
3505   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3506   PetscMPIInt            all_procs;
3507   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3508   PetscBool              compute_vecs = PETSC_FALSE;
3509   PetscScalar            *array;
3510   PetscErrorCode         ierr;
3511 
3512   PetscFunctionBegin;
3513   /* Assign global numbering to coarse dofs */
3514   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3515     compute_vecs = PETSC_TRUE;
3516     PetscInt ocoarse_size;
3517     ocoarse_size = pcbddc->coarse_size;
3518     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3519     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3520     /* see if we can avoid some work */
3521     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3522       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3523         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3524         coarse_reuse = PETSC_FALSE;
3525       } else { /* we can safely reuse already computed coarse matrix */
3526         coarse_reuse = PETSC_TRUE;
3527       }
3528     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3529       coarse_reuse = PETSC_FALSE;
3530     }
3531     /* reset any subassembling information */
3532     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3533     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3534   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3535     coarse_reuse = PETSC_TRUE;
3536   }
3537 
3538   /* count "active" (i.e. with positive local size) and "void" processes */
3539   im_active = !!(pcis->n);
3540   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3541   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3542   void_procs = all_procs-active_procs;
3543   csin_type_simple = PETSC_TRUE;
3544   redist = PETSC_FALSE;
3545   if (pcbddc->current_level && void_procs) {
3546     csin_ml = PETSC_TRUE;
3547     ncoarse_ml = void_procs;
3548     csin_ds = PETSC_TRUE;
3549     ncoarse_ds = void_procs;
3550   } else {
3551     csin_ml = PETSC_FALSE;
3552     ncoarse_ml = all_procs;
3553     if (void_procs) {
3554       csin_ds = PETSC_TRUE;
3555       ncoarse_ds = void_procs;
3556       csin_type_simple = PETSC_FALSE;
3557     } else {
3558       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3559         csin_ds = PETSC_TRUE;
3560         ncoarse_ds = pcbddc->redistribute_coarse;
3561         redist = PETSC_TRUE;
3562       } else {
3563         csin_ds = PETSC_FALSE;
3564         ncoarse_ds = all_procs;
3565       }
3566     }
3567   }
3568 
3569   /*
3570     test if we can go multilevel: three conditions must be satisfied:
3571     - we have not exceeded the number of levels requested
3572     - we can actually subassemble the active processes
3573     - we can find a suitable number of MPI processes where we can place the subassembled problem
3574   */
3575   multilevel_allowed = PETSC_FALSE;
3576   multilevel_requested = PETSC_FALSE;
3577   if (pcbddc->current_level < pcbddc->max_levels) {
3578     multilevel_requested = PETSC_TRUE;
3579     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3580       multilevel_allowed = PETSC_FALSE;
3581     } else {
3582       multilevel_allowed = PETSC_TRUE;
3583     }
3584   }
3585   /* determine number of process partecipating to coarse solver */
3586   if (multilevel_allowed) {
3587     ncoarse = ncoarse_ml;
3588     csin = csin_ml;
3589   } else {
3590     ncoarse = ncoarse_ds;
3591     csin = csin_ds;
3592   }
3593 
3594   /* creates temporary l2gmap and IS for coarse indexes */
3595   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3596   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3597 
3598   /* creates temporary MATIS object for coarse matrix */
3599   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3600   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3601   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3602   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3603 #if 0
3604   {
3605     PetscViewer viewer;
3606     char filename[256];
3607     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3608     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3609     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3610     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3611     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3612   }
3613 #endif
3614   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3615   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3616   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3619 
3620   /* compute dofs splitting and neumann boundaries for coarse dofs */
3621   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3622     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3623     const PetscInt         *idxs;
3624     ISLocalToGlobalMapping tmap;
3625 
3626     /* create map between primal indices (in local representative ordering) and local primal numbering */
3627     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3628     /* allocate space for temporary storage */
3629     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3630     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3631     /* allocate for IS array */
3632     nisdofs = pcbddc->n_ISForDofsLocal;
3633     nisneu = !!pcbddc->NeumannBoundariesLocal;
3634     nis = nisdofs + nisneu;
3635     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3636     /* dofs splitting */
3637     for (i=0;i<nisdofs;i++) {
3638       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3639       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3640       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3641       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3642       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3643       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3644       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3645       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3646     }
3647     /* neumann boundaries */
3648     if (pcbddc->NeumannBoundariesLocal) {
3649       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3650       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3651       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3652       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3653       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3654       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3655       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3656       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3657     }
3658     /* free memory */
3659     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3660     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3661     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3662   } else {
3663     nis = 0;
3664     nisdofs = 0;
3665     nisneu = 0;
3666     isarray = NULL;
3667   }
3668   /* destroy no longer needed map */
3669   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3670 
3671   /* restrict on coarse candidates (if needed) */
3672   coarse_mat_is = NULL;
3673   if (csin) {
3674     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3675       if (redist) {
3676         PetscMPIInt rank;
3677         PetscInt spc,n_spc_p1,dest[1];
3678 
3679         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3680         spc = all_procs/pcbddc->redistribute_coarse;
3681         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3682         if (rank > n_spc_p1*(spc+1)-1) {
3683           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3684         } else {
3685           dest[0] = rank/(spc+1);
3686         }
3687         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3688       } else {
3689         PetscInt j,tissize,*nisindices;
3690         PetscInt *coarse_candidates;
3691         const PetscInt* tisindices;
3692         /* get coarse candidates' ranks in pc communicator */
3693         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3694         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3695         for (i=0,j=0;i<all_procs;i++) {
3696           if (!coarse_candidates[i]) {
3697             coarse_candidates[j]=i;
3698             j++;
3699           }
3700         }
3701         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3702         /* get a suitable subassembling pattern */
3703         if (csin_type_simple) {
3704           PetscMPIInt rank;
3705           PetscInt    issize,isidx;
3706           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3707           if (im_active) {
3708             issize = 1;
3709             isidx = (PetscInt)rank;
3710           } else {
3711             issize = 0;
3712             isidx = -1;
3713           }
3714           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3715         } else {
3716           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3717         }
3718         if (pcbddc->dbg_flag) {
3719           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3720           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3721           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3722           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3723           for (i=0;i<j;i++) {
3724             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3725           }
3726           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3727           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3728         }
3729         /* shift the pattern on coarse candidates */
3730         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3731         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3732         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3733         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3734         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3735         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3736         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3737       }
3738     }
3739     if (pcbddc->dbg_flag) {
3740       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3741       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3742       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3743       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3744     }
3745     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3746     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3747   } else {
3748     if (pcbddc->dbg_flag) {
3749       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3750       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3751       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3752     }
3753     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3754     coarse_mat_is = t_coarse_mat_is;
3755   }
3756 
3757   /* create local to global scatters for coarse problem */
3758   if (compute_vecs) {
3759     PetscInt lrows;
3760     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3761     if (coarse_mat_is) {
3762       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3763     } else {
3764       lrows = 0;
3765     }
3766     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3767     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3768     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3769     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3770     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3771   }
3772   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3773   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3774 
3775   /* set defaults for coarse KSP and PC */
3776   if (multilevel_allowed) {
3777     coarse_ksp_type = KSPRICHARDSON;
3778     coarse_pc_type = PCBDDC;
3779   } else {
3780     coarse_ksp_type = KSPPREONLY;
3781     coarse_pc_type = PCREDUNDANT;
3782   }
3783 
3784   /* print some info if requested */
3785   if (pcbddc->dbg_flag) {
3786     if (!multilevel_allowed) {
3787       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3788       if (multilevel_requested) {
3789         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3790       } else if (pcbddc->max_levels) {
3791         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3792       }
3793       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3794     }
3795   }
3796 
3797   /* create the coarse KSP object only once with defaults */
3798   if (coarse_mat_is) {
3799     MatReuse coarse_mat_reuse;
3800     PetscViewer dbg_viewer = NULL;
3801     if (pcbddc->dbg_flag) {
3802       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3803       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3804     }
3805     if (!pcbddc->coarse_ksp) {
3806       char prefix[256],str_level[16];
3807       size_t len;
3808       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3809       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3810       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3811       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3812       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3813       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3814       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3815       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3816       /* prefix */
3817       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3818       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3819       if (!pcbddc->current_level) {
3820         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3821         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3822       } else {
3823         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3824         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3825         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3826         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3827         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3828         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3829       }
3830       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3831       /* allow user customization */
3832       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3833       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3834     }
3835 
3836     /* get some info after set from options */
3837     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3838     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3839     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3840     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3841     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3842       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3843       isbddc = PETSC_FALSE;
3844     }
3845     if (isredundant) {
3846       KSP inner_ksp;
3847       PC inner_pc;
3848       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3849       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3850       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3851     }
3852 
3853     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3854     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3855     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3856     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3857     if (nisdofs) {
3858       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3859       for (i=0;i<nisdofs;i++) {
3860         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3861       }
3862     }
3863     if (nisneu) {
3864       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3865       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3866     }
3867 
3868     /* assemble coarse matrix */
3869     if (coarse_reuse) {
3870       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3871       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3872       coarse_mat_reuse = MAT_REUSE_MATRIX;
3873     } else {
3874       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3875     }
3876     if (isbddc || isnn) {
3877       if (pcbddc->coarsening_ratio > 1) {
3878         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3879           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3880           if (pcbddc->dbg_flag) {
3881             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3882             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3883             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3884             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3885           }
3886         }
3887         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3888       } else {
3889         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3890         coarse_mat = coarse_mat_is;
3891       }
3892     } else {
3893       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3894     }
3895     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3896 
3897     /* propagate symmetry info to coarse matrix */
3898     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3899     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3900 
3901     /* set operators */
3902     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3903     if (pcbddc->dbg_flag) {
3904       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3905     }
3906   } else { /* processes non partecipating to coarse solver (if any) */
3907     coarse_mat = 0;
3908   }
3909   ierr = PetscFree(isarray);CHKERRQ(ierr);
3910 #if 0
3911   {
3912     PetscViewer viewer;
3913     char filename[256];
3914     sprintf(filename,"coarse_mat.m");
3915     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3916     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3917     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3918     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3919   }
3920 #endif
3921 
3922   /* Compute coarse null space (special handling by BDDC only) */
3923   if (pcbddc->NullSpace) {
3924     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3925   }
3926 
3927   if (pcbddc->coarse_ksp) {
3928     Vec crhs,csol;
3929     PetscBool ispreonly;
3930     if (CoarseNullSpace) {
3931       if (isbddc) {
3932         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3933       } else {
3934         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3935       }
3936     }
3937     /* setup coarse ksp */
3938     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3939     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3940     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3941     /* hack */
3942     if (!csol) {
3943       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3944     }
3945     if (!crhs) {
3946       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3947     }
3948     /* Check coarse problem if in debug mode or if solving with an iterative method */
3949     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3950     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3951       KSP       check_ksp;
3952       KSPType   check_ksp_type;
3953       PC        check_pc;
3954       Vec       check_vec,coarse_vec;
3955       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3956       PetscInt  its;
3957       PetscBool compute_eigs;
3958       PetscReal *eigs_r,*eigs_c;
3959       PetscInt  neigs;
3960       const char *prefix;
3961 
3962       /* Create ksp object suitable for estimation of extreme eigenvalues */
3963       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3964       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3965       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3966       if (ispreonly) {
3967         check_ksp_type = KSPPREONLY;
3968         compute_eigs = PETSC_FALSE;
3969       } else {
3970         check_ksp_type = KSPGMRES;
3971         compute_eigs = PETSC_TRUE;
3972       }
3973       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3974       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3975       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3976       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3977       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3978       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3979       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3980       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3981       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3982       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3983       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3984       /* create random vec */
3985       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3986       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3987       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3988       if (CoarseNullSpace) {
3989         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3990       }
3991       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3992       /* solve coarse problem */
3993       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3994       if (CoarseNullSpace) {
3995         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3996       }
3997       /* set eigenvalue estimation if preonly has not been requested */
3998       if (compute_eigs) {
3999         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4000         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4001         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4002         lambda_max = eigs_r[neigs-1];
4003         lambda_min = eigs_r[0];
4004         if (pcbddc->use_coarse_estimates) {
4005           if (lambda_max>lambda_min) {
4006             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4007             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4008           }
4009         }
4010       }
4011 
4012       /* check coarse problem residual error */
4013       if (pcbddc->dbg_flag) {
4014         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4015         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4016         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4017         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4018         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4019         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4020         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4021         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4022         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4023         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4024         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4025         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4026         if (compute_eigs) {
4027           PetscReal lambda_max_s,lambda_min_s;
4028           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4029           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4030           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4031           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4032           for (i=0;i<neigs;i++) {
4033             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4034           }
4035         }
4036         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4037         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4038       }
4039       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4040       if (compute_eigs) {
4041         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4042         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4043       }
4044     }
4045   }
4046   /* print additional info */
4047   if (pcbddc->dbg_flag) {
4048     /* waits until all processes reaches this point */
4049     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4050     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4051     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4052   }
4053 
4054   /* free memory */
4055   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4056   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4057   PetscFunctionReturn(0);
4058 }
4059 
4060 #undef __FUNCT__
4061 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4062 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4063 {
4064   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4065   PC_IS*         pcis = (PC_IS*)pc->data;
4066   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4067   PetscInt       i,coarse_size;
4068   PetscInt       *local_primal_indices;
4069   PetscErrorCode ierr;
4070 
4071   PetscFunctionBegin;
4072   /* Compute global number of coarse dofs */
4073   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4074     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4075   }
4076   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4077 
4078   /* check numbering */
4079   if (pcbddc->dbg_flag) {
4080     PetscScalar coarsesum,*array;
4081     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4082 
4083     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4084     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4085     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4086     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4087     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4088     for (i=0;i<pcbddc->local_primal_size;i++) {
4089       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4090     }
4091     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4092     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4093     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4094     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4095     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4096     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4097     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4098     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4099     for (i=0;i<pcis->n;i++) {
4100       if (array[i] == 1.0) {
4101         set_error = PETSC_TRUE;
4102         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4103       }
4104     }
4105     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4106     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4107     for (i=0;i<pcis->n;i++) {
4108       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4109     }
4110     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4111     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4112     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4113     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4114     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4115     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4116     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4117       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4118       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4119       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4120       for (i=0;i<pcbddc->local_primal_size;i++) {
4121         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4122       }
4123       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4124     }
4125     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4126     if (set_error_reduced) {
4127       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4128     }
4129   }
4130   /* get back data */
4131   *coarse_size_n = coarse_size;
4132   *local_primal_indices_n = local_primal_indices;
4133   PetscFunctionReturn(0);
4134 }
4135 
4136 #undef __FUNCT__
4137 #define __FUNCT__ "PCBDDCGlobalToLocal"
4138 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4139 {
4140   IS             localis_t;
4141   PetscInt       i,lsize,*idxs,n;
4142   PetscScalar    *vals;
4143   PetscErrorCode ierr;
4144 
4145   PetscFunctionBegin;
4146   /* get indices in local ordering exploiting local to global map */
4147   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4148   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4149   for (i=0;i<lsize;i++) vals[i] = 1.0;
4150   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4151   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4152   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4153   if (idxs) { /* multilevel guard */
4154     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4155   }
4156   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4157   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4158   ierr = PetscFree(vals);CHKERRQ(ierr);
4159   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4160   /* now compute set in local ordering */
4161   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4162   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4163   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4164   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4165   for (i=0,lsize=0;i<n;i++) {
4166     if (PetscRealPart(vals[i]) > 0.5) {
4167       lsize++;
4168     }
4169   }
4170   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4171   for (i=0,lsize=0;i<n;i++) {
4172     if (PetscRealPart(vals[i]) > 0.5) {
4173       idxs[lsize++] = i;
4174     }
4175   }
4176   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4177   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4178   *localis = localis_t;
4179   PetscFunctionReturn(0);
4180 }
4181 
4182 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4183 #undef __FUNCT__
4184 #define __FUNCT__ "PCBDDCMatMult_Private"
4185 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4186 {
4187   PCBDDCChange_ctx change_ctx;
4188   PetscErrorCode   ierr;
4189 
4190   PetscFunctionBegin;
4191   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4192   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4193   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4194   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4195   PetscFunctionReturn(0);
4196 }
4197 
4198 #undef __FUNCT__
4199 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4200 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4201 {
4202   PCBDDCChange_ctx change_ctx;
4203   PetscErrorCode   ierr;
4204 
4205   PetscFunctionBegin;
4206   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4207   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4208   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4209   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212