xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 5ff630250620d1ec1a1edb70acde3521db631784)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "PCBDDCResetSolvers"
87 PetscErrorCode PCBDDCResetSolvers(PC pc)
88 {
89   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
104   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
105   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
108   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
111   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
112   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
114   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
116   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
122 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
123 {
124   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
125   PC_IS          *pcis = (PC_IS*)pc->data;
126   VecType        impVecType;
127   PetscInt       n_constraints,n_R,old_size;
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   if (!pcbddc->ConstraintMatrix) {
132     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
133   }
134   /* get sizes */
135   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
136   n_R = pcis->n-pcbddc->n_actual_vertices;
137   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
138   /* local work vectors (try to avoid unneeded work)*/
139   /* R nodes */
140   old_size = -1;
141   if (pcbddc->vec1_R) {
142     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
143   }
144   if (n_R != old_size) {
145     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
146     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
147     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
148     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
149     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
150     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
151   }
152   /* local primal dofs */
153   old_size = -1;
154   if (pcbddc->vec1_P) {
155     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
156   }
157   if (pcbddc->local_primal_size != old_size) {
158     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
160     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
161     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
162   }
163   /* local explicit constraints */
164   old_size = -1;
165   if (pcbddc->vec1_C) {
166     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
167   }
168   if (n_constraints && n_constraints != old_size) {
169     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
171     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
172     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
173   }
174   PetscFunctionReturn(0);
175 }
176 
177 #undef __FUNCT__
178 #define __FUNCT__ "PCBDDCSetUpCorrection"
179 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
180 {
181   PetscErrorCode         ierr;
182   /* pointers to pcis and pcbddc */
183   PC_IS*                 pcis = (PC_IS*)pc->data;
184   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
185   /* submatrices of local problem */
186   Mat                    A_RV,A_VR,A_VV;
187   /* working matrices */
188   Mat                    M1,M2,M3,C_CR;
189   /* working vectors */
190   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
191   /* additional working stuff */
192   IS                     is_aux;
193   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
194   const PetscScalar      *array,*row_cmat_values;
195   const PetscInt         *row_cmat_indices,*idx_R_local;
196   PetscInt               *idx_V_B,*auxindices;
197   PetscInt               n_vertices,n_constraints,size_of_constraint;
198   PetscInt               i,j,n_R,n_D,n_B;
199   PetscBool              unsymmetric_check;
200   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
201   MatType                impMatType;
202   /* some shortcuts to scalars */
203   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
204   /* for debugging purposes */
205   PetscReal              *coarsefunctions_errors,*constraints_errors;
206 
207   PetscFunctionBegin;
208   /* get number of vertices (corners plus constraints with change of basis)
209      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
210   n_vertices = pcbddc->n_actual_vertices;
211   n_constraints = pcbddc->local_primal_size-n_vertices;
212   /* Set Non-overlapping dimensions */
213   n_B = pcis->n_B; n_D = pcis->n - n_B;
214   n_R = pcis->n-n_vertices;
215 
216   /* Set types for local objects needed by BDDC precondtioner */
217   impMatType = MATSEQDENSE;
218 
219   /* Allocating some extra storage just to be safe */
220   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
221   for (i=0;i<pcis->n;i++) auxindices[i]=i;
222 
223   /* vertices in boundary numbering */
224   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
225   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
226   if (i != n_vertices) {
227     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
228   }
229 
230   /* Precompute stuffs needed for preprocessing and application of BDDC*/
231   if (n_constraints) {
232     /* see if we can save some allocations */
233     if (pcbddc->local_auxmat2) {
234       PetscInt on_R,on_constraints;
235       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
236       if (on_R != n_R || on_constraints != n_constraints) {
237         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
238         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
239       }
240     }
241     /* work vectors */
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
243     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
244     /* auxiliary matrices */
245     if (!pcbddc->local_auxmat2) {
246       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
247       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
248       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
249       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
250     }
251 
252     /* Extract constraints on R nodes: C_{CR}  */
253     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
254     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
255     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
256 
257     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
258     for (i=0;i<n_constraints;i++) {
259       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
260       /* Get row of constraint matrix in R numbering */
261       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
262       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
263       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
264       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
265       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
266       /* Solve for row of constraint matrix in R numbering */
267       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
268       /* Set values in local_auxmat2 */
269       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
270       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
271       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
272     }
273     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
276 
277     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
278     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
279     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
280     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
281     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
282     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
283     ierr = MatSetUp(M1);CHKERRQ(ierr);
284     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
285     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
286     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
287     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
288     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
289     ierr = MatDestroy(&M2);CHKERRQ(ierr);
290     ierr = MatDestroy(&M3);CHKERRQ(ierr);
291     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
292     if (!pcbddc->local_auxmat1) {
293       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
294     } else {
295       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
296     }
297   }
298 
299   /* Get submatrices from subdomain matrix */
300   if (n_vertices) {
301     PetscInt ibs,mbs;
302     PetscBool issbaij;
303     Mat newmat;
304 
305     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
306     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
307     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
308     if (ibs != mbs) { /* need to convert to SEQAIJ */
309       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
312       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
314     } else {
315       /* this is safe */
316       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
317       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
318       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
319         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
320         /* which of the two approaches is faster? */
321         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
322         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
323         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
325         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
326       } else {
327         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
328         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
329       }
330     }
331     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
332     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
333     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
334   }
335 
336   /* Matrix of coarse basis functions (local) */
337   if (pcbddc->coarse_phi_B) {
338     PetscInt on_B,on_primal;
339     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
340     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
341       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
342       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
343     }
344   }
345   if (pcbddc->coarse_phi_D) {
346     PetscInt on_D,on_primal;
347     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
348     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
349       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
350       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
351     }
352   }
353   if (!pcbddc->coarse_phi_B) {
354     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
355     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
356     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
357     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
358   }
359   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
360     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
361     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
362     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
363     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
364   }
365 
366   if (pcbddc->dbg_flag) {
367     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
369     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
370   }
371   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
372   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
373 
374   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
375 
376   /* vertices */
377   for (i=0;i<n_vertices;i++) {
378     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
379     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
380     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
381     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
382     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
383     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
384     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
385     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
388     if (n_constraints) {
389       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
390       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
391       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
392     }
393     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
394     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
395 
396     /* Set values in coarse basis function and subdomain part of coarse_mat */
397     /* coarse basis functions */
398     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
399     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
402     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
403     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
404     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
405     if (pcbddc->switch_static || pcbddc->dbg_flag) {
406       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
410       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
411     }
412     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
413     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
414     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
416     if (n_constraints) {
417       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
418       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
420     }
421 
422     /* check */
423     if (pcbddc->dbg_flag) {
424       /* assemble subdomain vector on local nodes */
425       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
426       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       if (n_R) {
428         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
429       }
430       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
431       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
433       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
434       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
435       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
436       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
437       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
438       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
439       if (n_constraints) {
440         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
441         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
442         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
443       }
444       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
445       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
446       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
447       /* check saddle point solution */
448       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
449       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
450       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
451       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
452       /* shift by the identity matrix */
453       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
454       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
456       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
457     }
458   }
459 
460   /* constraints */
461   for (i=0;i<n_constraints;i++) {
462     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
463     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
464     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
465     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
466     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
467     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
468     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
469     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
470     if (n_vertices) {
471       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
472     }
473     /* Set values in coarse basis function and subdomain part of coarse_mat */
474     /* coarse basis functions */
475     j = i+n_vertices; /* don't touch this! */
476     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
477     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
480     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
482     if (pcbddc->switch_static || pcbddc->dbg_flag) {
483       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
485       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
486       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
488     }
489     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
490     if (n_vertices) {
491       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
492       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
493       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
494     }
495     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
496     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
497     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
498 
499     if (pcbddc->dbg_flag) {
500       /* assemble subdomain vector on nodes */
501       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
502       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
503       if (n_R) {
504         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
505       }
506       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
507       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
508       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
509       /* assemble subdomain vector of lagrange multipliers */
510       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
511       if (n_vertices) {
512         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
513         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
514         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
515       }
516       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
517       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
518       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
522       /* check saddle point solution */
523       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
524       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
525       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
526       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
527       /* shift by the identity matrix */
528       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
529       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
530       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
531       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
532     }
533   }
534   /* call assembling routines for local coarse basis */
535   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537   if (pcbddc->switch_static || pcbddc->dbg_flag) {
538     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
540   }
541 
542   /* compute other basis functions for non-symmetric problems */
543   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
544   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
545     PetscBool setsym;
546     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
547     if (!setsym) pcbddc->issym = PETSC_FALSE;
548   }
549 
550   if (!pcbddc->issym) {
551     if (!pcbddc->coarse_psi_B) {
552       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
553       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
554       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
555       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
556     }
557     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
558       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
559       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
560       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
561       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
562     }
563     for (i=0;i<pcbddc->local_primal_size;i++) {
564       if (n_constraints) {
565         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
566         for (j=0;j<n_constraints;j++) {
567           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
568         }
569         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
570         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
571       }
572       if (i<n_vertices) {
573         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
574         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
575         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
576         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
577         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
578         if (n_constraints) {
579           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
580         }
581       } else {
582         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
583       }
584       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
585       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
586       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
587       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
588       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
589       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
590       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
591       if (i<n_vertices) {
592         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
593       }
594       if (pcbddc->switch_static || pcbddc->dbg_flag) {
595         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
596         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
597         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
598         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
599         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
600       }
601 
602       if (pcbddc->dbg_flag) {
603         /* assemble subdomain vector on nodes */
604         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
605         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
606         if (n_R) {
607           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
608         }
609         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
610         if (i<n_vertices) {
611           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
612         }
613         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
614         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
615         /* assemble subdomain vector of lagrange multipliers */
616         for (j=0;j<pcbddc->local_primal_size;j++) {
617           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
618         }
619         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
620         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
621         /* check saddle point solution */
622         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
623         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
624         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
625         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
626         /* shift by the identity matrix */
627         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
628         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
629         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
630         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
631       }
632     }
633     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
635     if (pcbddc->switch_static || pcbddc->dbg_flag) {
636       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
637       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
638     }
639     unsymmetric_check = PETSC_TRUE;
640   } else { /* take references to already computed coarse basis */
641     unsymmetric_check = PETSC_FALSE;
642     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
643     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
644     if (pcbddc->coarse_phi_D) {
645       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
646       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
647     }
648   }
649   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
650   /* Checking coarse_sub_mat and coarse basis functios */
651   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
652   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
653   if (pcbddc->dbg_flag) {
654     Mat         coarse_sub_mat;
655     Mat         AUXMAT,TM1,TM2,TM3,TM4;
656     Mat         coarse_phi_D,coarse_phi_B;
657     Mat         coarse_psi_D,coarse_psi_B;
658     Mat         A_II,A_BB,A_IB,A_BI;
659     MatType     checkmattype=MATSEQAIJ;
660     PetscReal   real_value;
661 
662     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
663     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
664     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
665     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
666     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
667     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
668     if (unsymmetric_check) {
669       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
670       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
671     }
672     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
673 
674     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
675     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
676     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
677     if (unsymmetric_check) {
678       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
679       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
680       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
681       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
682       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
683       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
684       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
685       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
686       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
687       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
688       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
689       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
690     } else {
691       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
692       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
693       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
694       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
695       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
696       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
697       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
698       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
699     }
700     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
703     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
704     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
705     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
708     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
709     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
710     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
711     for (i=0;i<pcbddc->local_primal_size;i++) {
712       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
715     for (i=0;i<pcbddc->local_primal_size;i++) {
716       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
717     }
718     if (unsymmetric_check) {
719       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
720       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
721         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
722       }
723       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
724       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
725         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
726       }
727     }
728     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
732     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
734     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
735     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
736     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
737     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
738     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
739     if (unsymmetric_check) {
740       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
741       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
742     }
743     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
744     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
745     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
746     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
747   }
748   /* free memory */
749   if (n_vertices) {
750     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
751     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
752     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
753     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
754     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
755   }
756   if (n_constraints) {
757     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
758     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
759     ierr = MatDestroy(&M1);CHKERRQ(ierr);
760     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
761   }
762   ierr = PetscFree(auxindices);CHKERRQ(ierr);
763   /* get back data */
764   *coarse_submat_vals_n = coarse_submat_vals;
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
770 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
771 {
772   PC_IS*            pcis = (PC_IS*)(pc->data);
773   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
774   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
775   PetscBool         issbaij,isseqaij;
776   /* manage repeated solves */
777   MatReuse          reuse;
778   PetscErrorCode    ierr;
779 
780   PetscFunctionBegin;
781   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
782     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
783   }
784   /* get mat flags */
785   reuse = MAT_INITIAL_MATRIX;
786   if (pc->setupcalled) {
787     if (pc->flag == SAME_NONZERO_PATTERN) {
788       reuse = MAT_REUSE_MATRIX;
789     } else {
790       reuse = MAT_INITIAL_MATRIX;
791     }
792   }
793   if (reuse == MAT_INITIAL_MATRIX) {
794     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
795     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
796     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
797     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
798     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
799   }
800 
801   /* transform local matrices if needed */
802   if (pcbddc->ChangeOfBasisMatrix) {
803     Mat       *change_mat_all;
804     IS        is_local,is_global;
805     PetscBool sorted;
806     PetscInt  *idxs_perm;
807 
808     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
809     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
810     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
811     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
812     if (!sorted) {
813       const PetscInt *idxs;
814       PetscInt *idxs_sorted,i;
815 
816       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
817       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
818       for (i=0;i<pcis->n;i++) {
819         idxs_perm[i] = i;
820       }
821       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
822       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
823       for (i=0;i<pcis->n;i++) {
824         idxs_sorted[i] = idxs[idxs_perm[i]];
825       }
826       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
827       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
828       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
829     }
830 
831     /* get change of basis on the whole set of local dofs */
832     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
833 
834     if (!sorted) {
835       Mat      new_mat;
836       IS       is_perm;
837       PetscInt *idxs,i;
838 
839       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
840       for (i=0;i<pcis->n;i++) {
841         idxs[idxs_perm[i]] = i;
842       }
843       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
844       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
845       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
846       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
847       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
848       change_mat_all[0] = new_mat;
849       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
850     }
851 
852     /* check */
853     if (pcbddc->dbg_flag) {
854       Vec       x,x_change;
855       PetscReal error;
856 
857       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
858       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
859       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
860       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
861       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
864       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
865       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
866       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
867       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
868       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
869       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
870       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
871       ierr = VecDestroy(&x);CHKERRQ(ierr);
872       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
873     }
874 
875     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
876     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
877     if (isseqaij) {
878       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
879     } else {
880       Mat work_mat;
881       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
882       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
883       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
884     }
885     /*
886     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
887     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
888     */
889     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
890     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
891   } else {
892     /* without change of basis, the local matrix is unchanged */
893     if (!pcbddc->local_mat) {
894       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
895       pcbddc->local_mat = matis->A;
896     }
897   }
898 
899   /* get submatrices */
900   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
901   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
902   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
903   if (!issbaij) {
904     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
905     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
906   } else {
907     Mat newmat;
908     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
909     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
910     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
911     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
918 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
919 {
920   PC_IS*         pcis = (PC_IS*)(pc->data);
921   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
922   IS             is_aux1,is_aux2;
923   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
924   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
925   PetscInt       vbs,bs;
926   PetscBT        bitmask;
927   PetscErrorCode ierr;
928 
929   PetscFunctionBegin;
930   /*
931     No need to setup local scatters if
932       - primal space is unchanged
933         AND
934       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
935         AND
936       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
937   */
938   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
939     PetscFunctionReturn(0);
940   }
941   /* destroy old objects */
942   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
943   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
944   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
945   /* Set Non-overlapping dimensions */
946   n_B = pcis->n_B; n_D = pcis->n - n_B;
947   n_vertices = pcbddc->n_actual_vertices;
948   /* create auxiliary bitmask */
949   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
950   for (i=0;i<n_vertices;i++) {
951     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
952   }
953 
954   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
955   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
956   for (i=0, n_R=0; i<pcis->n; i++) {
957     if (!PetscBTLookup(bitmask,i)) {
958       idx_R_local[n_R] = i;
959       n_R++;
960     }
961   }
962 
963   /* Block code */
964   vbs = 1;
965   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
966   if (bs>1 && !(n_vertices%bs)) {
967     PetscBool is_blocked = PETSC_TRUE;
968     PetscInt  *vary;
969     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
970     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
971     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
972     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
973     for (i=0; i<n_vertices; i++) {
974       if (vary[i]!=0 && vary[i]!=bs) {
975         is_blocked = PETSC_FALSE;
976         break;
977       }
978     }
979     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
980       vbs = bs;
981       for (i=0;i<n_R/vbs;i++) {
982         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
983       }
984     }
985     ierr = PetscFree(vary);CHKERRQ(ierr);
986   }
987   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
988   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
989 
990   /* print some info if requested */
991   if (pcbddc->dbg_flag) {
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
996     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
998     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1000   }
1001 
1002   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1003   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1004   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1005   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1006   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1007   for (i=0; i<n_D; i++) {
1008     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1009   }
1010   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1011   for (i=0, j=0; i<n_R; i++) {
1012     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1013       aux_array1[j++] = i;
1014     }
1015   }
1016   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1017   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1018   for (i=0, j=0; i<n_B; i++) {
1019     if (!PetscBTLookup(bitmask,is_indices[i])) {
1020       aux_array2[j++] = i;
1021     }
1022   }
1023   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1024   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1025   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1026   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1027   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1028 
1029   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1030     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1031     for (i=0, j=0; i<n_R; i++) {
1032       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1033         aux_array1[j++] = i;
1034       }
1035     }
1036     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1037     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1038     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1039   }
1040   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1041   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1048 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1049 {
1050   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1051   PC_IS          *pcis = (PC_IS*)pc->data;
1052   PC             pc_temp;
1053   Mat            A_RR;
1054   MatReuse       reuse;
1055   PetscScalar    m_one = -1.0;
1056   PetscReal      value;
1057   PetscInt       n_D,n_R,ibs,mbs;
1058   PetscBool      use_exact,use_exact_reduced,issbaij;
1059   PetscErrorCode ierr;
1060   /* prefixes stuff */
1061   char           dir_prefix[256],neu_prefix[256],str_level[16];
1062   size_t         len;
1063 
1064   PetscFunctionBegin;
1065 
1066   /* compute prefixes */
1067   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1068   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1069   if (!pcbddc->current_level) {
1070     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1071     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1072     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1073     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1074   } else {
1075     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1076     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1077     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1078     len -= 15; /* remove "pc_bddc_coarse_" */
1079     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1080     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1081     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1082     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1083     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1084     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1085     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1086     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1087   }
1088 
1089   /* DIRICHLET PROBLEM */
1090   /* Matrix for Dirichlet problem is pcis->A_II */
1091   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1092   if (!pcbddc->ksp_D) { /* create object if not yet build */
1093     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1094     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1095     /* default */
1096     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1097     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1098     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1099     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1100     if (issbaij) {
1101       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1102     } else {
1103       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1104     }
1105     /* Allow user's customization */
1106     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1107     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1108   }
1109   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1110   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1111   if (!n_D) {
1112     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1113     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1114   }
1115   /* Set Up KSP for Dirichlet problem of BDDC */
1116   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1117   /* set ksp_D into pcis data */
1118   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1119   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1120   pcis->ksp_D = pcbddc->ksp_D;
1121 
1122   /* NEUMANN PROBLEM */
1123   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1124   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1125   if (pcbddc->ksp_R) { /* already created ksp */
1126     PetscInt nn_R;
1127     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1128     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1129     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1130     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1131       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1132       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1133       reuse = MAT_INITIAL_MATRIX;
1134     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1135       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1136         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1137         reuse = MAT_INITIAL_MATRIX;
1138       } else { /* safe to reuse the matrix */
1139         reuse = MAT_REUSE_MATRIX;
1140       }
1141     }
1142     /* last check */
1143     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1144       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1145       reuse = MAT_INITIAL_MATRIX;
1146     }
1147   } else { /* first time, so we need to create the matrix */
1148     reuse = MAT_INITIAL_MATRIX;
1149   }
1150   /* extract A_RR */
1151   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1152   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1153   if (ibs != mbs) {
1154     Mat newmat;
1155     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1156     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1157     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1158   } else {
1159     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1160   }
1161   if (!pcbddc->ksp_R) { /* create object if not present */
1162     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1163     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1164     /* default */
1165     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1166     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1167     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1168     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1169     if (issbaij) {
1170       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1171     } else {
1172       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1173     }
1174     /* Allow user's customization */
1175     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1176     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1177   }
1178   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1179   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1180   if (!n_R) {
1181     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1182     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1183   }
1184   /* Set Up KSP for Neumann problem of BDDC */
1185   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1186 
1187   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1188   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1189     /* Dirichlet */
1190     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1191     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1192     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1193     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1194     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1195     /* need to be adapted? */
1196     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1197     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1198     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1199     /* print info */
1200     if (pcbddc->dbg_flag) {
1201       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1202       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1203       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1204       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1205       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1206       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1207     }
1208     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1209       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1210     }
1211 
1212     /* Neumann */
1213     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1215     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1216     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1217     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1218     /* need to be adapted? */
1219     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1220     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1221     /* print info */
1222     if (pcbddc->dbg_flag) {
1223       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1224       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1225     }
1226     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1227       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1228     }
1229   }
1230   /* free Neumann problem's matrix */
1231   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1237 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1238 {
1239   PetscErrorCode ierr;
1240   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1241 
1242   PetscFunctionBegin;
1243   if (applytranspose) {
1244     if (pcbddc->local_auxmat1) {
1245       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1246       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1247     }
1248     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1249   } else {
1250     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1251     if (pcbddc->local_auxmat1) {
1252       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1253       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1254     }
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1260 #undef __FUNCT__
1261 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1262 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1263 {
1264   PetscErrorCode ierr;
1265   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1266   PC_IS*            pcis = (PC_IS*)  (pc->data);
1267   const PetscScalar zero = 0.0;
1268 
1269   PetscFunctionBegin;
1270   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1271   if (applytranspose) {
1272     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1273     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1274   } else {
1275     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1276     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1277   }
1278   /* start communications from local primal nodes to rhs of coarse solver */
1279   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1280   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282 
1283   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1284   /* TODO remove null space when doing multilevel */
1285   if (pcbddc->coarse_ksp) {
1286     if (applytranspose) {
1287       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1288     } else {
1289       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1290     }
1291   }
1292 
1293   /* Local solution on R nodes */
1294   if (pcis->n) {
1295     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1296     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1297     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1298     if (pcbddc->switch_static) {
1299       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1300       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301     }
1302     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1303     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1304     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306     if (pcbddc->switch_static) {
1307       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309     }
1310   }
1311 
1312   /* communications from coarse sol to local primal nodes */
1313   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1314   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315 
1316   /* Sum contributions from two levels */
1317   if (applytranspose) {
1318     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1319     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1320   } else {
1321     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1322     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1328 #undef __FUNCT__
1329 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1330 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1331 {
1332   PetscErrorCode ierr;
1333   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1334   PetscScalar    *array,*array2;
1335   Vec            from,to;
1336 
1337   PetscFunctionBegin;
1338   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1339     from = pcbddc->coarse_vec;
1340     to = pcbddc->vec1_P;
1341     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1342       Vec tvec;
1343       PetscInt lsize;
1344       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1345       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1346       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1347       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1348       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1349       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1350       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1351     }
1352   } else { /* from local to global -> put data in coarse right hand side */
1353     from = pcbddc->vec1_P;
1354     to = pcbddc->coarse_vec;
1355   }
1356   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1362 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1363 {
1364   PetscErrorCode ierr;
1365   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1366   PetscScalar    *array,*array2;
1367   Vec            from,to;
1368 
1369   PetscFunctionBegin;
1370   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1371     from = pcbddc->coarse_vec;
1372     to = pcbddc->vec1_P;
1373   } else { /* from local to global -> put data in coarse right hand side */
1374     from = pcbddc->vec1_P;
1375     to = pcbddc->coarse_vec;
1376   }
1377   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1378   if (smode == SCATTER_FORWARD) {
1379     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1380       Vec tvec;
1381       PetscInt lsize;
1382       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1383       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1384       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1385       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1386       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1387       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1388       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1389     }
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /* uncomment for testing purposes */
1395 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1399 {
1400   PetscErrorCode    ierr;
1401   PC_IS*            pcis = (PC_IS*)(pc->data);
1402   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1403   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1404   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1405   MatType           impMatType=MATSEQAIJ;
1406   /* one and zero */
1407   PetscScalar       one=1.0,zero=0.0;
1408   /* space to store constraints and their local indices */
1409   PetscScalar       *temp_quadrature_constraint;
1410   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1411   /* iterators */
1412   PetscInt          i,j,k,total_counts,temp_start_ptr;
1413   /* stuff to store connected components stored in pcbddc->mat_graph */
1414   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1415   PetscInt          n_ISForFaces,n_ISForEdges;
1416   /* near null space stuff */
1417   MatNullSpace      nearnullsp;
1418   const Vec         *nearnullvecs;
1419   Vec               *localnearnullsp;
1420   PetscBool         nnsp_has_cnst;
1421   PetscInt          nnsp_size;
1422   PetscScalar       *array;
1423   /* BLAS integers */
1424   PetscBLASInt      lwork,lierr;
1425   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1426   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1427   /* LAPACK working arrays for SVD or POD */
1428   PetscBool         skip_lapack;
1429   PetscScalar       *work;
1430   PetscReal         *singular_vals;
1431 #if defined(PETSC_USE_COMPLEX)
1432   PetscReal         *rwork;
1433 #endif
1434 #if defined(PETSC_MISSING_LAPACK_GESVD)
1435   PetscBLASInt      Blas_one_2=1;
1436   PetscScalar       *temp_basis,*correlation_mat;
1437 #else
1438   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1439   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1440 #endif
1441   /* reuse */
1442   PetscInt          olocal_primal_size;
1443   PetscInt          *oprimal_indices_local_idxs;
1444   /* change of basis */
1445   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1446   PetscBool         boolforchange,qr_needed;
1447   PetscBT           touched,change_basis,qr_needed_idx;
1448   /* auxiliary stuff */
1449   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1450   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1451   PetscScalar       *temp_quadrature_constraint_work;
1452   /* some quantities */
1453   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1454   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1455 
1456 
1457   PetscFunctionBegin;
1458   /* Destroy Mat objects computed previously */
1459   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1460   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1461   /* Get index sets for faces, edges and vertices from graph */
1462   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1463   /* free unneeded index sets */
1464   if (!pcbddc->use_vertices) {
1465     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1466   }
1467   if (!pcbddc->use_edges) {
1468     for (i=0;i<n_ISForEdges;i++) {
1469       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1470     }
1471     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1472     n_ISForEdges = 0;
1473   }
1474   if (!pcbddc->use_faces) {
1475     for (i=0;i<n_ISForFaces;i++) {
1476       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1477     }
1478     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1479     n_ISForFaces = 0;
1480   }
1481   /* HACKS (the following two blocks of code) */
1482   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1483     pcbddc->use_change_of_basis = PETSC_TRUE;
1484     if (!ISForEdges) {
1485       pcbddc->use_change_on_faces = PETSC_TRUE;
1486     }
1487   }
1488   if (pcbddc->NullSpace) {
1489     /* use_change_of_basis should be consistent among processors */
1490     PetscBool tbool[2],gbool[2];
1491     tbool [0] = pcbddc->use_change_of_basis;
1492     tbool [1] = pcbddc->use_change_on_faces;
1493     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1494     pcbddc->use_change_of_basis = gbool[0];
1495     pcbddc->use_change_on_faces = gbool[1];
1496   }
1497   /* print some info */
1498   if (pcbddc->dbg_flag) {
1499     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1500     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1501     i = 0;
1502     if (ISForVertices) {
1503       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1504     }
1505     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1506     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1507     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1509   }
1510   /* check if near null space is attached to global mat */
1511   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1512   if (nearnullsp) {
1513     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1514     /* remove any stored info */
1515     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1516     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1517     /* store information for BDDC solver reuse */
1518     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1519     pcbddc->onearnullspace = nearnullsp;
1520     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1521     for (i=0;i<nnsp_size;i++) {
1522       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1523     }
1524   } else { /* if near null space is not provided BDDC uses constants by default */
1525     nnsp_size = 0;
1526     nnsp_has_cnst = PETSC_TRUE;
1527   }
1528   /* get max number of constraints on a single cc */
1529   max_constraints = nnsp_size;
1530   if (nnsp_has_cnst) max_constraints++;
1531 
1532   /*
1533        Evaluate maximum storage size needed by the procedure
1534        - temp_indices will contain start index of each constraint stored as follows
1535        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1536        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1537        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1538                                                                                                                                                          */
1539   total_counts = n_ISForFaces+n_ISForEdges;
1540   total_counts *= max_constraints;
1541   n_vertices = 0;
1542   if (ISForVertices) {
1543     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1544   }
1545   total_counts += n_vertices;
1546   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1547   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1548   total_counts = 0;
1549   max_size_of_constraint = 0;
1550   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1551     if (i<n_ISForEdges) {
1552       used_IS = &ISForEdges[i];
1553     } else {
1554       used_IS = &ISForFaces[i-n_ISForEdges];
1555     }
1556     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1557     total_counts += j;
1558     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1559   }
1560   total_counts *= max_constraints;
1561   total_counts += n_vertices;
1562   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1563   /* get local part of global near null space vectors */
1564   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1565   for (k=0;k<nnsp_size;k++) {
1566     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1567     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569   }
1570 
1571   /* whether or not to skip lapack calls */
1572   skip_lapack = PETSC_TRUE;
1573   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1574 
1575   /* allocate some auxiliary stuff */
1576   if (!skip_lapack || pcbddc->use_qr_single) {
1577     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1578   }
1579 
1580   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1581   if (!skip_lapack) {
1582     PetscScalar temp_work;
1583 
1584 #if defined(PETSC_MISSING_LAPACK_GESVD)
1585     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1586     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1587     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1588     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1589 #if defined(PETSC_USE_COMPLEX)
1590     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1591 #endif
1592     /* now we evaluate the optimal workspace using query with lwork=-1 */
1593     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1594     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1595     lwork = -1;
1596     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1597 #if !defined(PETSC_USE_COMPLEX)
1598     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1599 #else
1600     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1601 #endif
1602     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1603     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1604 #else /* on missing GESVD */
1605     /* SVD */
1606     PetscInt max_n,min_n;
1607     max_n = max_size_of_constraint;
1608     min_n = max_constraints;
1609     if (max_size_of_constraint < max_constraints) {
1610       min_n = max_size_of_constraint;
1611       max_n = max_constraints;
1612     }
1613     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1614 #if defined(PETSC_USE_COMPLEX)
1615     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1616 #endif
1617     /* now we evaluate the optimal workspace using query with lwork=-1 */
1618     lwork = -1;
1619     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1620     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1621     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1622     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1623 #if !defined(PETSC_USE_COMPLEX)
1624     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1625 #else
1626     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1627 #endif
1628     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1629     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1630 #endif /* on missing GESVD */
1631     /* Allocate optimal workspace */
1632     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1633     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1634   }
1635   /* Now we can loop on constraining sets */
1636   total_counts = 0;
1637   temp_indices[0] = 0;
1638   /* vertices */
1639   if (ISForVertices) {
1640     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1641     if (nnsp_has_cnst) { /* consider all vertices */
1642       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1643       for (i=0;i<n_vertices;i++) {
1644         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1645         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1646         total_counts++;
1647       }
1648     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1649       PetscBool used_vertex;
1650       for (i=0;i<n_vertices;i++) {
1651         used_vertex = PETSC_FALSE;
1652         k = 0;
1653         while (!used_vertex && k<nnsp_size) {
1654           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1655           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1656             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1657             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1658             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1659             total_counts++;
1660             used_vertex = PETSC_TRUE;
1661           }
1662           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1663           k++;
1664         }
1665       }
1666     }
1667     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1668     n_vertices = total_counts;
1669   }
1670 
1671   /* edges and faces */
1672   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1673     if (ncc<n_ISForEdges) {
1674       used_IS = &ISForEdges[ncc];
1675       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1676     } else {
1677       used_IS = &ISForFaces[ncc-n_ISForEdges];
1678       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1679     }
1680     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1681     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1682     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1683     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1684     /* change of basis should not be performed on local periodic nodes */
1685     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1686     if (nnsp_has_cnst) {
1687       PetscScalar quad_value;
1688       temp_constraints++;
1689       if (!pcbddc->use_nnsp_true) {
1690         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1691       } else {
1692         quad_value = 1.0;
1693       }
1694       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1695       for (j=0;j<size_of_constraint;j++) {
1696         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1697       }
1698       /* sort by global ordering if using lapack subroutines */
1699       if (!skip_lapack || pcbddc->use_qr_single) {
1700         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1701         for (j=0;j<size_of_constraint;j++) {
1702           permutation[j]=j;
1703         }
1704         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1705         for (j=0;j<size_of_constraint;j++) {
1706           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1707           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1708         }
1709         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1710         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1711       }
1712       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1713       total_counts++;
1714     }
1715     for (k=0;k<nnsp_size;k++) {
1716       PetscReal real_value;
1717       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1718       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1719       for (j=0;j<size_of_constraint;j++) {
1720         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1721       }
1722       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1723       /* check if array is null on the connected component */
1724       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1725       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1726       if (real_value > 0.0) { /* keep indices and values */
1727         /* sort by global ordering if using lapack subroutines */
1728         if (!skip_lapack || pcbddc->use_qr_single) {
1729           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1730           for (j=0;j<size_of_constraint;j++) {
1731             permutation[j]=j;
1732           }
1733           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1734           for (j=0;j<size_of_constraint;j++) {
1735             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1736             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1737           }
1738           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1739           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1740         }
1741         temp_constraints++;
1742         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1743         total_counts++;
1744       }
1745     }
1746     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1747     valid_constraints = temp_constraints;
1748     if (!pcbddc->use_nnsp_true && temp_constraints) {
1749       if (temp_constraints == 1) { /* just normalize the constraint */
1750         PetscScalar norm;
1751         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1752         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1753         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1754         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1755       } else { /* perform SVD */
1756         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1757 
1758 #if defined(PETSC_MISSING_LAPACK_GESVD)
1759         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1760            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1761            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1762               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1763               from that computed using LAPACKgesvd
1764            -> This is due to a different computation of eigenvectors in LAPACKheev
1765            -> The quality of the POD-computed basis will be the same */
1766         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1767         /* Store upper triangular part of correlation matrix */
1768         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1769         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1770         for (j=0;j<temp_constraints;j++) {
1771           for (k=0;k<j+1;k++) {
1772             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1773           }
1774         }
1775         /* compute eigenvalues and eigenvectors of correlation matrix */
1776         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1777         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1778 #if !defined(PETSC_USE_COMPLEX)
1779         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1780 #else
1781         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1782 #endif
1783         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1784         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1785         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1786         j = 0;
1787         while (j < temp_constraints && singular_vals[j] < tol) j++;
1788         total_counts = total_counts-j;
1789         valid_constraints = temp_constraints-j;
1790         /* scale and copy POD basis into used quadrature memory */
1791         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1792         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1793         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1794         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1795         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1796         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1797         if (j<temp_constraints) {
1798           PetscInt ii;
1799           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1800           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1801           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1802           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1803           for (k=0;k<temp_constraints-j;k++) {
1804             for (ii=0;ii<size_of_constraint;ii++) {
1805               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1806             }
1807           }
1808         }
1809 #else  /* on missing GESVD */
1810         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1811         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1812         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1813         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1814 #if !defined(PETSC_USE_COMPLEX)
1815         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1816 #else
1817         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1818 #endif
1819         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1820         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1821         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1822         k = temp_constraints;
1823         if (k > size_of_constraint) k = size_of_constraint;
1824         j = 0;
1825         while (j < k && singular_vals[k-j-1] < tol) j++;
1826         valid_constraints = k-j;
1827         total_counts = total_counts-temp_constraints+valid_constraints;
1828 #endif /* on missing GESVD */
1829       }
1830     }
1831     /* setting change_of_basis flag is safe now */
1832     if (boolforchange) {
1833       for (j=0;j<valid_constraints;j++) {
1834         PetscBTSet(change_basis,total_counts-j-1);
1835       }
1836     }
1837   }
1838   /* free index sets of faces, edges and vertices */
1839   for (i=0;i<n_ISForFaces;i++) {
1840     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1841   }
1842   if (n_ISForFaces) {
1843     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1844   }
1845   for (i=0;i<n_ISForEdges;i++) {
1846     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1847   }
1848   if (n_ISForEdges) {
1849     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1850   }
1851   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1852   /* map temp_indices_to_constraint in boundary numbering */
1853   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1854   if (i != temp_indices[total_counts]) {
1855     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1856   }
1857 
1858   /* free workspace */
1859   if (!skip_lapack || pcbddc->use_qr_single) {
1860     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1861   }
1862   if (!skip_lapack) {
1863     ierr = PetscFree(work);CHKERRQ(ierr);
1864 #if defined(PETSC_USE_COMPLEX)
1865     ierr = PetscFree(rwork);CHKERRQ(ierr);
1866 #endif
1867     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1868 #if defined(PETSC_MISSING_LAPACK_GESVD)
1869     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1870     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1871 #endif
1872   }
1873   for (k=0;k<nnsp_size;k++) {
1874     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1875   }
1876   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1877 
1878   /* set quantities in pcbddc data structure and store previous primal size */
1879   /* n_vertices defines the number of subdomain corners in the primal space */
1880   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1881   olocal_primal_size = pcbddc->local_primal_size;
1882   pcbddc->local_primal_size = total_counts;
1883   pcbddc->n_vertices = n_vertices;
1884   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1885 
1886   /* Create constraint matrix */
1887   /* The constraint matrix is used to compute the l2g map of primal dofs */
1888   /* so we need to set it up properly either with or without change of basis */
1889   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1890   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1891   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1892   /* array to compute a local numbering of constraints : vertices first then constraints */
1893   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1894   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1895   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1896   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1897   /* auxiliary stuff for basis change */
1898   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1899   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1900 
1901   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1902   total_primal_vertices=0;
1903   for (i=0;i<pcbddc->local_primal_size;i++) {
1904     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1905     if (size_of_constraint == 1) {
1906       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1907       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1908       aux_primal_minloc[total_primal_vertices]=0;
1909       total_primal_vertices++;
1910     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1911       PetscInt min_loc,min_index;
1912       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1913       /* find first untouched local node */
1914       k = 0;
1915       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1916       min_index = global_indices[k];
1917       min_loc = k;
1918       /* search the minimum among global nodes already untouched on the cc */
1919       for (k=1;k<size_of_constraint;k++) {
1920         /* there can be more than one constraint on a single connected component */
1921         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1922           min_index = global_indices[k];
1923           min_loc = k;
1924         }
1925       }
1926       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1927       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1928       aux_primal_minloc[total_primal_vertices]=min_loc;
1929       total_primal_vertices++;
1930     }
1931   }
1932   /* determine if a QR strategy is needed for change of basis */
1933   qr_needed = PETSC_FALSE;
1934   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1935   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1936     if (PetscBTLookup(change_basis,i)) {
1937       if (!pcbddc->use_qr_single) {
1938         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1939         j = 0;
1940         for (k=0;k<size_of_constraint;k++) {
1941           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1942             j++;
1943           }
1944         }
1945         /* found more than one primal dof on the cc */
1946         if (j > 1) {
1947           PetscBTSet(qr_needed_idx,i);
1948           qr_needed = PETSC_TRUE;
1949         }
1950       } else {
1951         PetscBTSet(qr_needed_idx,i);
1952         qr_needed = PETSC_TRUE;
1953       }
1954     }
1955   }
1956   /* free workspace */
1957   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1958 
1959   /* permute indices in order to have a sorted set of vertices */
1960   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1961 
1962   /* nonzero structure of constraint matrix */
1963   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1964   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1965   j=total_primal_vertices;
1966   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1967     if (!PetscBTLookup(change_basis,i)) {
1968       nnz[j]=temp_indices[i+1]-temp_indices[i];
1969       j++;
1970     }
1971   }
1972   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1973   ierr = PetscFree(nnz);CHKERRQ(ierr);
1974   /* set values in constraint matrix */
1975   for (i=0;i<total_primal_vertices;i++) {
1976     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1977   }
1978   total_counts = total_primal_vertices;
1979   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1980     if (!PetscBTLookup(change_basis,i)) {
1981       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1982       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1983       total_counts++;
1984     }
1985   }
1986   /* assembling */
1987   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   /*
1990   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1991   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1992   */
1993   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1994   if (pcbddc->use_change_of_basis) {
1995     /* dual and primal dofs on a single cc */
1996     PetscInt     dual_dofs,primal_dofs;
1997     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1998     PetscInt     primal_counter;
1999     /* working stuff for GEQRF */
2000     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2001     PetscBLASInt lqr_work;
2002     /* working stuff for UNGQR */
2003     PetscScalar  *gqr_work,lgqr_work_t;
2004     PetscBLASInt lgqr_work;
2005     /* working stuff for TRTRS */
2006     PetscScalar  *trs_rhs;
2007     PetscBLASInt Blas_NRHS;
2008     /* pointers for values insertion into change of basis matrix */
2009     PetscInt     *start_rows,*start_cols;
2010     PetscScalar  *start_vals;
2011     /* working stuff for values insertion */
2012     PetscBT      is_primal;
2013     /* matrix sizes */
2014     PetscInt     global_size,local_size;
2015     /* work array for nonzeros */
2016     PetscScalar  *nnz_array;
2017     /* temporary change of basis */
2018     Mat          localChangeOfBasisMatrix;
2019     /* auxiliary work for global change of basis */
2020     Vec          nnz_vec;
2021     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2022     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2023     PetscScalar  *vals;
2024     PetscBool    done;
2025 
2026     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2027     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2028     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2029     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2030 
2031     /* nonzeros for local mat */
2032     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2033     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2034     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2035       if (PetscBTLookup(change_basis,i)) {
2036         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2037         if (PetscBTLookup(qr_needed_idx,i)) {
2038           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2039         } else {
2040           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2041           /* get local primal index on the cc */
2042           j = 0;
2043           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2044           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2045         }
2046       }
2047     }
2048     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2049     /* Set initial identity in the matrix */
2050     for (i=0;i<pcis->n_B;i++) {
2051       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2052     }
2053 
2054     if (pcbddc->dbg_flag) {
2055       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2056       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2057     }
2058 
2059 
2060     /* Now we loop on the constraints which need a change of basis */
2061     /*
2062        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2063        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2064 
2065        Basic blocks of change of basis matrix T computed by
2066 
2067           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2068 
2069             | 1        0   ...        0         s_1/S |
2070             | 0        1   ...        0         s_2/S |
2071             |              ...                        |
2072             | 0        ...            1     s_{n-1}/S |
2073             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2074 
2075             with S = \sum_{i=1}^n s_i^2
2076             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2077                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2078 
2079           - QR decomposition of constraints otherwise
2080     */
2081     if (qr_needed) {
2082       /* space to store Q */
2083       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2084       /* first we issue queries for optimal work */
2085       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2086       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2087       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2088       lqr_work = -1;
2089       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2090       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2091       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2092       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2093       lgqr_work = -1;
2094       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2095       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2096       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2097       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2098       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2099       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2100       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2101       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2102       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2103       /* array to store scaling factors for reflectors */
2104       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2105       /* array to store rhs and solution of triangular solver */
2106       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2107       /* allocating workspace for check */
2108       if (pcbddc->dbg_flag) {
2109         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2110       }
2111     }
2112     /* array to store whether a node is primal or not */
2113     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2114     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2115     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2116     if (i != total_primal_vertices) {
2117       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2118     }
2119     for (i=0;i<total_primal_vertices;i++) {
2120       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2121     }
2122     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2123 
2124     /* loop on constraints and see whether or not they need a change of basis and compute it */
2125     /* -> using implicit ordering contained in temp_indices data */
2126     total_counts = pcbddc->n_vertices;
2127     primal_counter = total_counts;
2128     while (total_counts<pcbddc->local_primal_size) {
2129       primal_dofs = 1;
2130       if (PetscBTLookup(change_basis,total_counts)) {
2131         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2132         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2133           primal_dofs++;
2134         }
2135         /* get constraint info */
2136         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2137         dual_dofs = size_of_constraint-primal_dofs;
2138 
2139         if (pcbddc->dbg_flag) {
2140           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2141         }
2142 
2143         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2144 
2145           /* copy quadrature constraints for change of basis check */
2146           if (pcbddc->dbg_flag) {
2147             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2148           }
2149           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2150           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2151 
2152           /* compute QR decomposition of constraints */
2153           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2154           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2155           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2156           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2157           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2158           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2159           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2160 
2161           /* explictly compute R^-T */
2162           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2163           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2164           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2165           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2166           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2167           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2168           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2169           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2170           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2171           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2172 
2173           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2174           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2175           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2176           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2177           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2178           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2179           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2180           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2181           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2182 
2183           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2184              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2185              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2186           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2187           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2188           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2189           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2190           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2191           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2192           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2193           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2194           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2195           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2196 
2197           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2198           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2199           /* insert cols for primal dofs */
2200           for (j=0;j<primal_dofs;j++) {
2201             start_vals = &qr_basis[j*size_of_constraint];
2202             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2203             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2204           }
2205           /* insert cols for dual dofs */
2206           for (j=0,k=0;j<dual_dofs;k++) {
2207             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2208               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2209               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2210               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2211               j++;
2212             }
2213           }
2214 
2215           /* check change of basis */
2216           if (pcbddc->dbg_flag) {
2217             PetscInt   ii,jj;
2218             PetscBool valid_qr=PETSC_TRUE;
2219             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2220             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2221             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2222             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2223             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2224             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2225             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2226             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2227             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2228             for (jj=0;jj<size_of_constraint;jj++) {
2229               for (ii=0;ii<primal_dofs;ii++) {
2230                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2231                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2232               }
2233             }
2234             if (!valid_qr) {
2235               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2236               for (jj=0;jj<size_of_constraint;jj++) {
2237                 for (ii=0;ii<primal_dofs;ii++) {
2238                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2239                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2240                   }
2241                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2242                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2243                   }
2244                 }
2245               }
2246             } else {
2247               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2248             }
2249           }
2250         } else { /* simple transformation block */
2251           PetscInt    row,col;
2252           PetscScalar val,norm;
2253 
2254           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2255           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2256           for (j=0;j<size_of_constraint;j++) {
2257             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2258             if (!PetscBTLookup(is_primal,row)) {
2259               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2260               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2261               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2262             } else {
2263               for (k=0;k<size_of_constraint;k++) {
2264                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2265                 if (row != col) {
2266                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2267                 } else {
2268                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2269                 }
2270                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2271               }
2272             }
2273           }
2274           if (pcbddc->dbg_flag) {
2275             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2276           }
2277         }
2278         /* increment primal counter */
2279         primal_counter += primal_dofs;
2280       } else {
2281         if (pcbddc->dbg_flag) {
2282           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2283         }
2284       }
2285       /* increment constraint counter total_counts */
2286       total_counts += primal_dofs;
2287     }
2288 
2289     /* free workspace */
2290     if (qr_needed) {
2291       if (pcbddc->dbg_flag) {
2292         ierr = PetscFree(work);CHKERRQ(ierr);
2293       }
2294       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2295       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2296       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2297       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2298       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2299     }
2300     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2301     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2302     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2303 
2304     /* assembling of global change of variable */
2305     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2306     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2307     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2308     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2309     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2310     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2311 
2312     /* nonzeros (overestimated) */
2313     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2314     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2315     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2316     for (i=0;i<pcis->n;i++) {
2317       nnz_array[i] = 1.0;
2318       idxs_all[i] = i;
2319     }
2320     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2321     for (i=0;i<pcis->n_B;i++) {
2322       nnz_array[idxs_B[i]] = nnz[i];
2323     }
2324     if (pcis->n) {
2325       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2326     }
2327     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2328     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2329     ierr = PetscFree(nnz);CHKERRQ(ierr);
2330     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2331     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2332     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2333     for (i=0;i<local_size;i++) {
2334       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2335       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2336     }
2337     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2338     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2339     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2340     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2341 
2342     /* Set identity on dirichlet dofs */
2343     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2344     for (i=0;i<pcis->n-pcis->n_B;i++) {
2345       PetscScalar one=1.0;
2346       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2347     }
2348     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2349 
2350     /* Set values at interface dofs */
2351     done = PETSC_TRUE;
2352     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2353     if (!done) {
2354       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2355     }
2356     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2357     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2358     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2359     for (i=0;i<nvtxs;i++) {
2360       PetscInt    row,*cols,ncols;
2361       PetscScalar *mat_vals;
2362 
2363       row = idxs_B[i];
2364       ncols = xadj[i+1]-xadj[i];
2365       cols = idxs_mapped+xadj[i];
2366       mat_vals = vals+xadj[i];
2367       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2368     }
2369     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2370     if (!done) {
2371       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2372     }
2373     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2374     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2375     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2376     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378 
2379     /* check */
2380     if (pcbddc->dbg_flag) {
2381       PetscReal error;
2382       Vec       x,x_change;
2383 
2384       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2385       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2386       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2387       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2388       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2389       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2390       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2391       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2392       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2393       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2394       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2395       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2396       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2397       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2398       ierr = VecDestroy(&x);CHKERRQ(ierr);
2399       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2400     }
2401     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2402   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2403     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2404     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2405   }
2406 
2407   /* set up change of basis context */
2408   if (pcbddc->ChangeOfBasisMatrix) {
2409     PCBDDCChange_ctx change_ctx;
2410 
2411     if (!pcbddc->new_global_mat) {
2412       PetscInt global_size,local_size;
2413 
2414       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2415       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2416       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2417       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2418       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2419       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2420       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2421       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2422       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2423     } else {
2424       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2425       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2426       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2427     }
2428     if (!pcbddc->user_ChangeOfBasisMatrix) {
2429       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2430       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2431     } else {
2432       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2433       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2434     }
2435     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2436     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2437     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2438     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2439   }
2440 
2441   /* get indices in local ordering for vertices and constraints */
2442   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2443     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2444     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2445   }
2446   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2447   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2448   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2449   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2450   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2451   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2452   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2453   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2454   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2455   /* set quantities in PCBDDC data struct */
2456   pcbddc->n_actual_vertices = i;
2457   /* check if a new primal space has been introduced */
2458   pcbddc->new_primal_space_local = PETSC_TRUE;
2459   if (olocal_primal_size == pcbddc->local_primal_size) {
2460     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2461     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2462     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2463   }
2464   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2465   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2466 
2467   /* flush dbg viewer */
2468   if (pcbddc->dbg_flag) {
2469     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2470   }
2471 
2472   /* free workspace */
2473   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2474   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2475   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2476   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2477   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2478   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2484 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2485 {
2486   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2487   PC_IS       *pcis = (PC_IS*)pc->data;
2488   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2489   PetscInt    ierr,i,vertex_size;
2490   PetscViewer viewer=pcbddc->dbg_viewer;
2491 
2492   PetscFunctionBegin;
2493   /* Reset previously computed graph */
2494   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2495   /* Init local Graph struct */
2496   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2497 
2498   /* Check validity of the csr graph passed in by the user */
2499   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2500     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2501   }
2502 
2503   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2504   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2505     Mat       mat_adj;
2506     PetscInt  *xadj,*adjncy;
2507     PetscInt  nvtxs;
2508     PetscBool flg_row=PETSC_TRUE;
2509 
2510     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2511     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2512     if (!flg_row) {
2513       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2514     }
2515     if (pcbddc->use_local_adj) {
2516       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2517       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2518     } else { /* just compute subdomain's connected components */
2519       IS                     is_dummy;
2520       ISLocalToGlobalMapping l2gmap_dummy;
2521       PetscInt               j,sum;
2522       PetscInt               *cxadj,*cadjncy;
2523       const PetscInt         *idxs;
2524       PCBDDCGraph            graph;
2525       PetscBT                is_on_boundary;
2526 
2527       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2528       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2529       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2530       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2531       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2532       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2533       graph->xadj = xadj;
2534       graph->adjncy = adjncy;
2535       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2536       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2537 
2538       if (pcbddc->dbg_flag) {
2539         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2540         for (i=0;i<graph->ncc;i++) {
2541           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2542         }
2543         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2544       }
2545 
2546       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2547       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2548       for (i=0;i<pcis->n_B;i++) {
2549         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2550       }
2551       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2552 
2553       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2554       sum = 0;
2555       for (i=0;i<graph->ncc;i++) {
2556         PetscInt sizecc = 0;
2557         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2558           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2559             sizecc++;
2560           }
2561         }
2562         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2563           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2564             cxadj[graph->queue[j]] = sizecc;
2565           }
2566         }
2567         sum += sizecc*sizecc;
2568       }
2569       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2570       sum = 0;
2571       for (i=0;i<nvtxs;i++) {
2572         PetscInt temp = cxadj[i];
2573         cxadj[i] = sum;
2574         sum += temp;
2575       }
2576       cxadj[nvtxs] = sum;
2577       for (i=0;i<graph->ncc;i++) {
2578         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2579           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2580             PetscInt k,sizecc = 0;
2581             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2582               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2583                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2584                 sizecc++;
2585               }
2586             }
2587           }
2588         }
2589       }
2590       if (nvtxs) {
2591         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2592       } else {
2593         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2594         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2595       }
2596       graph->xadj = 0;
2597       graph->adjncy = 0;
2598       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2599       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2600     }
2601     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2602     if (!flg_row) {
2603       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2604     }
2605     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2606   }
2607 
2608   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2609   vertex_size = 1;
2610   if (pcbddc->user_provided_isfordofs) {
2611     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2612       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2613       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2614         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2615         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2616       }
2617       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2618       pcbddc->n_ISForDofs = 0;
2619       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2620     }
2621     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2622     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2623   } else {
2624     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2625       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2626       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2627       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2628         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2629       }
2630     }
2631   }
2632 
2633   /* Setup of Graph */
2634   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2635     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2636   }
2637   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2638     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2639   }
2640   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2641 
2642   /* Graph's connected components analysis */
2643   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2644 
2645   /* print some info to stdout */
2646   if (pcbddc->dbg_flag) {
2647     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2648   }
2649 
2650   /* mark topography has done */
2651   pcbddc->recompute_topography = PETSC_FALSE;
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2657 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2658 {
2659   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2660   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2661   PetscErrorCode ierr;
2662 
2663   PetscFunctionBegin;
2664   n = 0;
2665   vertices = 0;
2666   if (pcbddc->ConstraintMatrix) {
2667     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2668     for (i=0;i<local_primal_size;i++) {
2669       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2670       if (size_of_constraint == 1) n++;
2671       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2672     }
2673     if (vertices_idx) {
2674       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2675       n = 0;
2676       for (i=0;i<local_primal_size;i++) {
2677         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2678         if (size_of_constraint == 1) {
2679           vertices[n++]=row_cmat_indices[0];
2680         }
2681         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2682       }
2683     }
2684   }
2685   *n_vertices = n;
2686   if (vertices_idx) *vertices_idx = vertices;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2692 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2693 {
2694   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2695   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2696   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2697   PetscBT        touched;
2698   PetscErrorCode ierr;
2699 
2700     /* This function assumes that the number of local constraints per connected component
2701        is not greater than the number of nodes defined for the connected component
2702        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2703   PetscFunctionBegin;
2704   n = 0;
2705   constraints_index = 0;
2706   if (pcbddc->ConstraintMatrix) {
2707     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2708     max_size_of_constraint = 0;
2709     for (i=0;i<local_primal_size;i++) {
2710       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2711       if (size_of_constraint > 1) {
2712         n++;
2713       }
2714       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2715       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2716     }
2717     if (constraints_idx) {
2718       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2719       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2720       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2721       n = 0;
2722       for (i=0;i<local_primal_size;i++) {
2723         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2724         if (size_of_constraint > 1) {
2725           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2726           /* find first untouched local node */
2727           j = 0;
2728           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2729           min_index = row_cmat_global_indices[j];
2730           min_loc = j;
2731           /* search the minimum among nodes not yet touched on the connected component
2732              since there can be more than one constraint on a single cc */
2733           for (j=1;j<size_of_constraint;j++) {
2734             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2735               min_index = row_cmat_global_indices[j];
2736               min_loc = j;
2737             }
2738           }
2739           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2740           constraints_index[n++] = row_cmat_indices[min_loc];
2741         }
2742         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2743       }
2744       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2745       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2746     }
2747   }
2748   *n_constraints = n;
2749   if (constraints_idx) *constraints_idx = constraints_index;
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "PCBDDCSubsetNumbering"
2755 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2756 {
2757   Vec            local_vec,global_vec;
2758   IS             seqis,paris;
2759   VecScatter     scatter_ctx;
2760   PetscScalar    *array;
2761   PetscInt       *temp_global_dofs;
2762   PetscScalar    globalsum;
2763   PetscInt       i,j,s;
2764   PetscInt       nlocals,first_index,old_index,max_local;
2765   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2766   PetscMPIInt    *dof_sizes,*dof_displs;
2767   PetscBool      first_found;
2768   PetscErrorCode ierr;
2769 
2770   PetscFunctionBegin;
2771   /* mpi buffers */
2772   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2773   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2774   j = ( !rank_prec_comm ? size_prec_comm : 0);
2775   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2776   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2777   /* get maximum size of subset */
2778   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2779   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2780   max_local = 0;
2781   for (i=0;i<n_local_dofs;i++) {
2782     if (max_local < temp_global_dofs[i] ) {
2783       max_local = temp_global_dofs[i];
2784     }
2785   }
2786   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2787   max_global++;
2788   max_local = 0;
2789   for (i=0;i<n_local_dofs;i++) {
2790     if (max_local < local_dofs[i] ) {
2791       max_local = local_dofs[i];
2792     }
2793   }
2794   max_local++;
2795   /* allocate workspace */
2796   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2797   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2798   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2799   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2800   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2801   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2802   /* create scatter */
2803   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2804   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2805   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2806   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2807   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2808   /* init array */
2809   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2810   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2811   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2812   if (local_dofs_mult) {
2813     for (i=0;i<n_local_dofs;i++) {
2814       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2815     }
2816   } else {
2817     for (i=0;i<n_local_dofs;i++) {
2818       array[local_dofs[i]]=1.0;
2819     }
2820   }
2821   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2822   /* scatter into global vec and get total number of global dofs */
2823   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2824   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2825   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2826   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2827   /* Fill global_vec with cumulative function for global numbering */
2828   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2829   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2830   nlocals = 0;
2831   first_index = -1;
2832   first_found = PETSC_FALSE;
2833   for (i=0;i<s;i++) {
2834     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2835       first_found = PETSC_TRUE;
2836       first_index = i;
2837     }
2838     nlocals += (PetscInt)PetscRealPart(array[i]);
2839   }
2840   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2841   if (!rank_prec_comm) {
2842     dof_displs[0]=0;
2843     for (i=1;i<size_prec_comm;i++) {
2844       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2845     }
2846   }
2847   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2848   if (first_found) {
2849     array[first_index] += (PetscScalar)nlocals;
2850     old_index = first_index;
2851     for (i=first_index+1;i<s;i++) {
2852       if (PetscRealPart(array[i]) > 0.1) {
2853         array[i] += array[old_index];
2854         old_index = i;
2855       }
2856     }
2857   }
2858   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2859   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2860   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2862   /* get global ordering of local dofs */
2863   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2864   if (local_dofs_mult) {
2865     for (i=0;i<n_local_dofs;i++) {
2866       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2867     }
2868   } else {
2869     for (i=0;i<n_local_dofs;i++) {
2870       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2871     }
2872   }
2873   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2874   /* free workspace */
2875   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2876   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2877   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2878   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2879   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2880   /* return pointer to global ordering of local dofs */
2881   *global_numbering_subset = temp_global_dofs;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2887 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2888 {
2889   PetscInt       i,j;
2890   PetscScalar    *alphas;
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894   /* this implements stabilized Gram-Schmidt */
2895   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2896   for (i=0;i<n;i++) {
2897     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2898     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2899     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2900   }
2901   ierr = PetscFree(alphas);CHKERRQ(ierr);
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatISGetSubassemblingPattern"
2907 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2908 {
2909   Mat             subdomain_adj;
2910   IS              new_ranks,ranks_send_to;
2911   MatPartitioning partitioner;
2912   Mat_IS          *matis;
2913   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2914   PetscInt        prank;
2915   PetscMPIInt     size,rank,color;
2916   PetscInt        *xadj,*adjncy,*oldranks;
2917   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2918   PetscInt        i,local_size,threshold=0;
2919   PetscErrorCode  ierr;
2920   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2921   PetscSubcomm    subcomm;
2922 
2923   PetscFunctionBegin;
2924   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2925   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2926   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2927 
2928   /* Get info on mapping */
2929   matis = (Mat_IS*)(mat->data);
2930   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2931   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2932 
2933   /* build local CSR graph of subdomains' connectivity */
2934   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2935   xadj[0] = 0;
2936   xadj[1] = PetscMax(n_neighs-1,0);
2937   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2938   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2939 
2940   if (threshold) {
2941     PetscInt xadj_count = 0;
2942     for (i=1;i<n_neighs;i++) {
2943       if (n_shared[i] > threshold) {
2944         adjncy[xadj_count] = neighs[i];
2945         adjncy_wgt[xadj_count] = n_shared[i];
2946         xadj_count++;
2947       }
2948     }
2949     xadj[1] = xadj_count;
2950   } else {
2951     if (xadj[1]) {
2952       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2953       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2954     }
2955   }
2956   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2957   if (use_square) {
2958     for (i=0;i<xadj[1];i++) {
2959       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2960     }
2961   }
2962   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2963 
2964   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2965 
2966   /*
2967     Restrict work on active processes only.
2968   */
2969   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2970   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2971   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2972   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2973   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2974   if (color) {
2975     ierr = PetscFree(xadj);CHKERRQ(ierr);
2976     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2977     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2978   } else {
2979     PetscInt coarsening_ratio;
2980     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2981     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2982     prank = rank;
2983     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2984     /*
2985     for (i=0;i<size;i++) {
2986       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2987     }
2988     */
2989     for (i=0;i<xadj[1];i++) {
2990       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2991     }
2992     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2993     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2994     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2995 
2996     /* Partition */
2997     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2998     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2999     if (use_vwgt) {
3000       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3001       v_wgt[0] = local_size;
3002       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3003     }
3004     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3005     coarsening_ratio = size/n_subdomains;
3006     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3007     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3008     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3009     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3010 
3011     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3012     if (contiguous) {
3013       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3014     } else {
3015       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3016     }
3017     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3018     /* clean up */
3019     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3020     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3021     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3022     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3023   }
3024   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3025 
3026   /* assemble parallel IS for sends */
3027   i = 1;
3028   if (color) i=0;
3029   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3030 
3031   /* get back IS */
3032   *is_sends = ranks_send_to;
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatISSubassemble"
3040 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3041 {
3042   Mat                    local_mat;
3043   Mat_IS                 *matis;
3044   IS                     is_sends_internal;
3045   PetscInt               rows,cols,new_local_rows;
3046   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3047   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3048   ISLocalToGlobalMapping l2gmap;
3049   PetscInt*              l2gmap_indices;
3050   const PetscInt*        is_indices;
3051   MatType                new_local_type;
3052   /* buffers */
3053   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3054   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3055   PetscInt               *recv_buffer_idxs_local;
3056   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3057   /* MPI */
3058   MPI_Comm               comm,comm_n;
3059   PetscSubcomm           subcomm;
3060   PetscMPIInt            n_sends,n_recvs,commsize;
3061   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3062   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3063   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3064   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3065   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3066   PetscErrorCode         ierr;
3067 
3068   PetscFunctionBegin;
3069   /* TODO: add missing checks */
3070   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3071   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3072   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3073   PetscValidLogicalCollectiveInt(mat,nis,7);
3074   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3075   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3076   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3077   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3078   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3079   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3080   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3081   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3082     PetscInt mrows,mcols,mnrows,mncols;
3083     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3084     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3085     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3086     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3087     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3088     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3089   }
3090   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3091   PetscValidLogicalCollectiveInt(mat,bs,0);
3092   /* prepare IS for sending if not provided */
3093   if (!is_sends) {
3094     PetscBool pcontig = PETSC_TRUE;
3095     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3096     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3097   } else {
3098     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3099     is_sends_internal = is_sends;
3100   }
3101 
3102   /* get pointer of MATIS data */
3103   matis = (Mat_IS*)mat->data;
3104 
3105   /* get comm */
3106   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3107 
3108   /* compute number of sends */
3109   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3110   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3111 
3112   /* compute number of receives */
3113   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3114   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3115   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3116   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3117   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3118   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3119   ierr = PetscFree(iflags);CHKERRQ(ierr);
3120 
3121   /* restrict comm if requested */
3122   subcomm = 0;
3123   destroy_mat = PETSC_FALSE;
3124   if (restrict_comm) {
3125     PetscMPIInt color,rank,subcommsize;
3126     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3127     color = 0;
3128     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3129     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3130     subcommsize = commsize - subcommsize;
3131     /* check if reuse has been requested */
3132     if (reuse == MAT_REUSE_MATRIX) {
3133       if (*mat_n) {
3134         PetscMPIInt subcommsize2;
3135         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3136         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3137         comm_n = PetscObjectComm((PetscObject)*mat_n);
3138       } else {
3139         comm_n = PETSC_COMM_SELF;
3140       }
3141     } else { /* MAT_INITIAL_MATRIX */
3142       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3143       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3144       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3145       comm_n = subcomm->comm;
3146     }
3147     /* flag to destroy *mat_n if not significative */
3148     if (color) destroy_mat = PETSC_TRUE;
3149   } else {
3150     comm_n = comm;
3151   }
3152 
3153   /* prepare send/receive buffers */
3154   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3155   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3156   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3157   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3158   if (nis) {
3159     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3160   }
3161 
3162   /* Get data from local matrices */
3163   if (!isdense) {
3164     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3165     /* TODO: See below some guidelines on how to prepare the local buffers */
3166     /*
3167        send_buffer_vals should contain the raw values of the local matrix
3168        send_buffer_idxs should contain:
3169        - MatType_PRIVATE type
3170        - PetscInt        size_of_l2gmap
3171        - PetscInt        global_row_indices[size_of_l2gmap]
3172        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3173     */
3174   } else {
3175     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3176     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3177     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3178     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3179     send_buffer_idxs[1] = i;
3180     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3181     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3182     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3183     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3184     for (i=0;i<n_sends;i++) {
3185       ilengths_vals[is_indices[i]] = len*len;
3186       ilengths_idxs[is_indices[i]] = len+2;
3187     }
3188   }
3189   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3190   /* additional is (if any) */
3191   if (nis) {
3192     PetscMPIInt psum;
3193     PetscInt j;
3194     for (j=0,psum=0;j<nis;j++) {
3195       PetscInt plen;
3196       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3197       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3198       psum += len+1; /* indices + lenght */
3199     }
3200     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3201     for (j=0,psum=0;j<nis;j++) {
3202       PetscInt plen;
3203       const PetscInt *is_array_idxs;
3204       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3205       send_buffer_idxs_is[psum] = plen;
3206       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3207       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3208       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3209       psum += plen+1; /* indices + lenght */
3210     }
3211     for (i=0;i<n_sends;i++) {
3212       ilengths_idxs_is[is_indices[i]] = psum;
3213     }
3214     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3215   }
3216 
3217   buf_size_idxs = 0;
3218   buf_size_vals = 0;
3219   buf_size_idxs_is = 0;
3220   for (i=0;i<n_recvs;i++) {
3221     buf_size_idxs += (PetscInt)olengths_idxs[i];
3222     buf_size_vals += (PetscInt)olengths_vals[i];
3223     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3224   }
3225   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3226   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3227   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3228 
3229   /* get new tags for clean communications */
3230   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3231   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3232   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3233 
3234   /* allocate for requests */
3235   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3236   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3237   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3238   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3240   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3241 
3242   /* communications */
3243   ptr_idxs = recv_buffer_idxs;
3244   ptr_vals = recv_buffer_vals;
3245   ptr_idxs_is = recv_buffer_idxs_is;
3246   for (i=0;i<n_recvs;i++) {
3247     source_dest = onodes[i];
3248     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3249     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3250     ptr_idxs += olengths_idxs[i];
3251     ptr_vals += olengths_vals[i];
3252     if (nis) {
3253       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3254       ptr_idxs_is += olengths_idxs_is[i];
3255     }
3256   }
3257   for (i=0;i<n_sends;i++) {
3258     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3259     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3260     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3261     if (nis) {
3262       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3263     }
3264   }
3265   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3266   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3267 
3268   /* assemble new l2g map */
3269   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3270   ptr_idxs = recv_buffer_idxs;
3271   new_local_rows = 0;
3272   for (i=0;i<n_recvs;i++) {
3273     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3274     ptr_idxs += olengths_idxs[i];
3275   }
3276   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3277   ptr_idxs = recv_buffer_idxs;
3278   new_local_rows = 0;
3279   for (i=0;i<n_recvs;i++) {
3280     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3281     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3282     ptr_idxs += olengths_idxs[i];
3283   }
3284   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3285   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3286   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3287 
3288   /* infer new local matrix type from received local matrices type */
3289   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3290   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3291   if (n_recvs) {
3292     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3293     ptr_idxs = recv_buffer_idxs;
3294     for (i=0;i<n_recvs;i++) {
3295       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3296         new_local_type_private = MATAIJ_PRIVATE;
3297         break;
3298       }
3299       ptr_idxs += olengths_idxs[i];
3300     }
3301     switch (new_local_type_private) {
3302       case MATDENSE_PRIVATE:
3303         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3304           new_local_type = MATSEQAIJ;
3305           bs = 1;
3306         } else { /* if I receive only 1 dense matrix */
3307           new_local_type = MATSEQDENSE;
3308           bs = 1;
3309         }
3310         break;
3311       case MATAIJ_PRIVATE:
3312         new_local_type = MATSEQAIJ;
3313         bs = 1;
3314         break;
3315       case MATBAIJ_PRIVATE:
3316         new_local_type = MATSEQBAIJ;
3317         break;
3318       case MATSBAIJ_PRIVATE:
3319         new_local_type = MATSEQSBAIJ;
3320         break;
3321       default:
3322         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3323         break;
3324     }
3325   } else { /* by default, new_local_type is seqdense */
3326     new_local_type = MATSEQDENSE;
3327     bs = 1;
3328   }
3329 
3330   /* create MATIS object if needed */
3331   if (reuse == MAT_INITIAL_MATRIX) {
3332     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3333     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3334   } else {
3335     /* it also destroys the local matrices */
3336     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3337   }
3338   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3339   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3340 
3341   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3342 
3343   /* Global to local map of received indices */
3344   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3345   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3346   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3347 
3348   /* restore attributes -> type of incoming data and its size */
3349   buf_size_idxs = 0;
3350   for (i=0;i<n_recvs;i++) {
3351     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3352     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3353     buf_size_idxs += (PetscInt)olengths_idxs[i];
3354   }
3355   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3356 
3357   /* set preallocation */
3358   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3359   if (!newisdense) {
3360     PetscInt *new_local_nnz=0;
3361 
3362     ptr_vals = recv_buffer_vals;
3363     ptr_idxs = recv_buffer_idxs_local;
3364     if (n_recvs) {
3365       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3366     }
3367     for (i=0;i<n_recvs;i++) {
3368       PetscInt j;
3369       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3370         for (j=0;j<*(ptr_idxs+1);j++) {
3371           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3372         }
3373       } else {
3374         /* TODO */
3375       }
3376       ptr_idxs += olengths_idxs[i];
3377     }
3378     if (new_local_nnz) {
3379       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3380       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3381       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3382       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3383       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3384       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3385     } else {
3386       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3387     }
3388     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3389   } else {
3390     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3391   }
3392 
3393   /* set values */
3394   ptr_vals = recv_buffer_vals;
3395   ptr_idxs = recv_buffer_idxs_local;
3396   for (i=0;i<n_recvs;i++) {
3397     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3398       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3399       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3400       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3401       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3402       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3403     } else {
3404       /* TODO */
3405     }
3406     ptr_idxs += olengths_idxs[i];
3407     ptr_vals += olengths_vals[i];
3408   }
3409   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3414   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3415 
3416 #if 0
3417   if (!restrict_comm) { /* check */
3418     Vec       lvec,rvec;
3419     PetscReal infty_error;
3420 
3421     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3422     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3423     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3424     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3425     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3426     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3427     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3428     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3429     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3430   }
3431 #endif
3432 
3433   /* assemble new additional is (if any) */
3434   if (nis) {
3435     PetscInt **temp_idxs,*count_is,j,psum;
3436 
3437     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3438     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3439     ptr_idxs = recv_buffer_idxs_is;
3440     psum = 0;
3441     for (i=0;i<n_recvs;i++) {
3442       for (j=0;j<nis;j++) {
3443         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3444         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3445         psum += plen;
3446         ptr_idxs += plen+1; /* shift pointer to received data */
3447       }
3448     }
3449     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3450     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3451     for (i=1;i<nis;i++) {
3452       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3453     }
3454     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3455     ptr_idxs = recv_buffer_idxs_is;
3456     for (i=0;i<n_recvs;i++) {
3457       for (j=0;j<nis;j++) {
3458         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3459         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3460         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3461         ptr_idxs += plen+1; /* shift pointer to received data */
3462       }
3463     }
3464     for (i=0;i<nis;i++) {
3465       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3466       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3467       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3468     }
3469     ierr = PetscFree(count_is);CHKERRQ(ierr);
3470     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3471     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3472   }
3473   /* free workspace */
3474   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3475   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3476   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3477   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3478   if (isdense) {
3479     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3480     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3481   } else {
3482     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3483   }
3484   if (nis) {
3485     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3486     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3487   }
3488   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3489   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3490   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3491   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3492   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3493   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3494   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3495   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3496   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3497   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3498   ierr = PetscFree(onodes);CHKERRQ(ierr);
3499   if (nis) {
3500     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3501     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3502     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3503   }
3504   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3505   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3506     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3507     for (i=0;i<nis;i++) {
3508       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3509     }
3510   }
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 /* temporary hack into ksp private data structure */
3515 #include <petsc-private/kspimpl.h>
3516 
3517 #undef __FUNCT__
3518 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3519 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3520 {
3521   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3522   PC_IS                  *pcis = (PC_IS*)pc->data;
3523   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3524   MatNullSpace           CoarseNullSpace=NULL;
3525   ISLocalToGlobalMapping coarse_islg;
3526   IS                     coarse_is,*isarray;
3527   PetscInt               i,im_active=-1,active_procs=-1;
3528   PetscInt               nis,nisdofs,nisneu;
3529   PC                     pc_temp;
3530   PCType                 coarse_pc_type;
3531   KSPType                coarse_ksp_type;
3532   PetscBool              multilevel_requested,multilevel_allowed;
3533   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3534   Mat                    t_coarse_mat_is;
3535   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3536   PetscMPIInt            all_procs;
3537   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3538   PetscBool              compute_vecs = PETSC_FALSE;
3539   PetscScalar            *array;
3540   PetscErrorCode         ierr;
3541 
3542   PetscFunctionBegin;
3543   /* Assign global numbering to coarse dofs */
3544   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3545     compute_vecs = PETSC_TRUE;
3546     PetscInt ocoarse_size;
3547     ocoarse_size = pcbddc->coarse_size;
3548     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3549     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3550     /* see if we can avoid some work */
3551     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3552       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3553         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3554         coarse_reuse = PETSC_FALSE;
3555       } else { /* we can safely reuse already computed coarse matrix */
3556         coarse_reuse = PETSC_TRUE;
3557       }
3558     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3559       coarse_reuse = PETSC_FALSE;
3560     }
3561     /* reset any subassembling information */
3562     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3563     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3564   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3565     coarse_reuse = PETSC_TRUE;
3566   }
3567 
3568   /* count "active" (i.e. with positive local size) and "void" processes */
3569   im_active = !!(pcis->n);
3570   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3571   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3572   void_procs = all_procs-active_procs;
3573   csin_type_simple = PETSC_TRUE;
3574   redist = PETSC_FALSE;
3575   if (pcbddc->current_level && void_procs) {
3576     csin_ml = PETSC_TRUE;
3577     ncoarse_ml = void_procs;
3578     csin_ds = PETSC_TRUE;
3579     ncoarse_ds = void_procs;
3580   } else {
3581     csin_ml = PETSC_FALSE;
3582     ncoarse_ml = all_procs;
3583     if (void_procs) {
3584       csin_ds = PETSC_TRUE;
3585       ncoarse_ds = void_procs;
3586       csin_type_simple = PETSC_FALSE;
3587     } else {
3588       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3589         csin_ds = PETSC_TRUE;
3590         ncoarse_ds = pcbddc->redistribute_coarse;
3591         redist = PETSC_TRUE;
3592       } else {
3593         csin_ds = PETSC_FALSE;
3594         ncoarse_ds = all_procs;
3595       }
3596     }
3597   }
3598 
3599   /*
3600     test if we can go multilevel: three conditions must be satisfied:
3601     - we have not exceeded the number of levels requested
3602     - we can actually subassemble the active processes
3603     - we can find a suitable number of MPI processes where we can place the subassembled problem
3604   */
3605   multilevel_allowed = PETSC_FALSE;
3606   multilevel_requested = PETSC_FALSE;
3607   if (pcbddc->current_level < pcbddc->max_levels) {
3608     multilevel_requested = PETSC_TRUE;
3609     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3610       multilevel_allowed = PETSC_FALSE;
3611     } else {
3612       multilevel_allowed = PETSC_TRUE;
3613     }
3614   }
3615   /* determine number of process partecipating to coarse solver */
3616   if (multilevel_allowed) {
3617     ncoarse = ncoarse_ml;
3618     csin = csin_ml;
3619   } else {
3620     ncoarse = ncoarse_ds;
3621     csin = csin_ds;
3622   }
3623 
3624   /* creates temporary l2gmap and IS for coarse indexes */
3625   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3626   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3627 
3628   /* creates temporary MATIS object for coarse matrix */
3629   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3630   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3631   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3632   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3633 #if 0
3634   {
3635     PetscViewer viewer;
3636     char filename[256];
3637     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3638     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3639     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3640     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3641     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3642   }
3643 #endif
3644   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3645   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3646   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3649 
3650   /* compute dofs splitting and neumann boundaries for coarse dofs */
3651   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3652     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3653     const PetscInt         *idxs;
3654     ISLocalToGlobalMapping tmap;
3655 
3656     /* create map between primal indices (in local representative ordering) and local primal numbering */
3657     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3658     /* allocate space for temporary storage */
3659     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3660     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3661     /* allocate for IS array */
3662     nisdofs = pcbddc->n_ISForDofsLocal;
3663     nisneu = !!pcbddc->NeumannBoundariesLocal;
3664     nis = nisdofs + nisneu;
3665     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3666     /* dofs splitting */
3667     for (i=0;i<nisdofs;i++) {
3668       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3669       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3670       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3671       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3672       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3673       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3674       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3675       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3676     }
3677     /* neumann boundaries */
3678     if (pcbddc->NeumannBoundariesLocal) {
3679       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3680       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3681       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3682       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3683       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3684       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3685       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3686       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3687     }
3688     /* free memory */
3689     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3690     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3691     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3692   } else {
3693     nis = 0;
3694     nisdofs = 0;
3695     nisneu = 0;
3696     isarray = NULL;
3697   }
3698   /* destroy no longer needed map */
3699   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3700 
3701   /* restrict on coarse candidates (if needed) */
3702   coarse_mat_is = NULL;
3703   if (csin) {
3704     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3705       if (redist) {
3706         PetscMPIInt rank;
3707         PetscInt spc,n_spc_p1,dest[1];
3708 
3709         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3710         spc = all_procs/pcbddc->redistribute_coarse;
3711         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3712         if (rank > n_spc_p1*(spc+1)-1) {
3713           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3714         } else {
3715           dest[0] = rank/(spc+1);
3716         }
3717         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3718       } else {
3719         PetscInt j,tissize,*nisindices;
3720         PetscInt *coarse_candidates;
3721         const PetscInt* tisindices;
3722         /* get coarse candidates' ranks in pc communicator */
3723         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3724         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3725         for (i=0,j=0;i<all_procs;i++) {
3726           if (!coarse_candidates[i]) {
3727             coarse_candidates[j]=i;
3728             j++;
3729           }
3730         }
3731         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3732         /* get a suitable subassembling pattern */
3733         if (csin_type_simple) {
3734           PetscMPIInt rank;
3735           PetscInt    issize,isidx;
3736           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3737           if (im_active) {
3738             issize = 1;
3739             isidx = (PetscInt)rank;
3740           } else {
3741             issize = 0;
3742             isidx = -1;
3743           }
3744           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3745         } else {
3746           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3747         }
3748         if (pcbddc->dbg_flag) {
3749           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3750           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3751           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3752           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3753           for (i=0;i<j;i++) {
3754             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3755           }
3756           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3757           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3758         }
3759         /* shift the pattern on coarse candidates */
3760         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3761         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3762         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3763         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3764         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3765         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3766         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3767       }
3768     }
3769     if (pcbddc->dbg_flag) {
3770       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3771       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3772       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3773       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3774     }
3775     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3776     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3777   } else {
3778     if (pcbddc->dbg_flag) {
3779       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3780       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3781       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3782     }
3783     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3784     coarse_mat_is = t_coarse_mat_is;
3785   }
3786 
3787   /* create local to global scatters for coarse problem */
3788   if (compute_vecs) {
3789     PetscInt lrows;
3790     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3791     if (coarse_mat_is) {
3792       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3793     } else {
3794       lrows = 0;
3795     }
3796     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3797     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3798     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3799     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3800     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3801   }
3802   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3803   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3804 
3805   /* set defaults for coarse KSP and PC */
3806   if (multilevel_allowed) {
3807     coarse_ksp_type = KSPRICHARDSON;
3808     coarse_pc_type = PCBDDC;
3809   } else {
3810     coarse_ksp_type = KSPPREONLY;
3811     coarse_pc_type = PCREDUNDANT;
3812   }
3813 
3814   /* print some info if requested */
3815   if (pcbddc->dbg_flag) {
3816     if (!multilevel_allowed) {
3817       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3818       if (multilevel_requested) {
3819         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3820       } else if (pcbddc->max_levels) {
3821         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3822       }
3823       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3824     }
3825   }
3826 
3827   /* create the coarse KSP object only once with defaults */
3828   if (coarse_mat_is) {
3829     MatReuse coarse_mat_reuse;
3830     PetscViewer dbg_viewer = NULL;
3831     if (pcbddc->dbg_flag) {
3832       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3833       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3834     }
3835     if (!pcbddc->coarse_ksp) {
3836       char prefix[256],str_level[16];
3837       size_t len;
3838       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3839       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3840       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3841       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3842       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3843       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3844       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3845       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3846       /* prefix */
3847       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3848       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3849       if (!pcbddc->current_level) {
3850         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3851         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3852       } else {
3853         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3854         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3855         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3856         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3857         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3858         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3859       }
3860       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3861       /* allow user customization */
3862       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3863       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3864     }
3865 
3866     /* get some info after set from options */
3867     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3868     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3869     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3870     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3871     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3872       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3873       isbddc = PETSC_FALSE;
3874     }
3875     if (isredundant) {
3876       KSP inner_ksp;
3877       PC inner_pc;
3878       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3879       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3880       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3881     }
3882 
3883     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3884     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3885     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3886     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3887     if (nisdofs) {
3888       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3889       for (i=0;i<nisdofs;i++) {
3890         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3891       }
3892     }
3893     if (nisneu) {
3894       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3895       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3896     }
3897 
3898     /* assemble coarse matrix */
3899     if (coarse_reuse) {
3900       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3901       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3902       coarse_mat_reuse = MAT_REUSE_MATRIX;
3903     } else {
3904       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3905     }
3906     if (isbddc || isnn) {
3907       if (pcbddc->coarsening_ratio > 1) {
3908         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3909           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3910           if (pcbddc->dbg_flag) {
3911             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3912             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3913             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3914             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3915           }
3916         }
3917         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3918       } else {
3919         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3920         coarse_mat = coarse_mat_is;
3921       }
3922     } else {
3923       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3924     }
3925     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3926 
3927     /* propagate symmetry info to coarse matrix */
3928     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3929     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3930 
3931     /* set operators */
3932     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3933     if (pcbddc->dbg_flag) {
3934       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3935     }
3936   } else { /* processes non partecipating to coarse solver (if any) */
3937     coarse_mat = 0;
3938   }
3939   ierr = PetscFree(isarray);CHKERRQ(ierr);
3940 #if 0
3941   {
3942     PetscViewer viewer;
3943     char filename[256];
3944     sprintf(filename,"coarse_mat.m");
3945     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3946     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3947     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3948     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3949   }
3950 #endif
3951 
3952   /* Compute coarse null space (special handling by BDDC only) */
3953   if (pcbddc->NullSpace) {
3954     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3955   }
3956 
3957   if (pcbddc->coarse_ksp) {
3958     Vec crhs,csol;
3959     PetscBool ispreonly;
3960     if (CoarseNullSpace) {
3961       if (isbddc) {
3962         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3963       } else {
3964         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3965       }
3966     }
3967     /* setup coarse ksp */
3968     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3969     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3970     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3971     /* hack */
3972     if (!csol) {
3973       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3974     }
3975     if (!crhs) {
3976       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3977     }
3978     /* Check coarse problem if in debug mode or if solving with an iterative method */
3979     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3980     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3981       KSP       check_ksp;
3982       KSPType   check_ksp_type;
3983       PC        check_pc;
3984       Vec       check_vec,coarse_vec;
3985       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3986       PetscInt  its;
3987       PetscBool compute_eigs;
3988       PetscReal *eigs_r,*eigs_c;
3989       PetscInt  neigs;
3990       const char *prefix;
3991 
3992       /* Create ksp object suitable for estimation of extreme eigenvalues */
3993       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3994       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3995       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3996       if (ispreonly) {
3997         check_ksp_type = KSPPREONLY;
3998         compute_eigs = PETSC_FALSE;
3999       } else {
4000         check_ksp_type = KSPGMRES;
4001         compute_eigs = PETSC_TRUE;
4002       }
4003       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4004       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4005       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4006       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4007       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4008       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4009       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4010       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4011       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4012       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4013       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4014       /* create random vec */
4015       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4016       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4017       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4018       if (CoarseNullSpace) {
4019         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4020       }
4021       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4022       /* solve coarse problem */
4023       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4024       if (CoarseNullSpace) {
4025         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4026       }
4027       /* set eigenvalue estimation if preonly has not been requested */
4028       if (compute_eigs) {
4029         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4030         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4031         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4032         lambda_max = eigs_r[neigs-1];
4033         lambda_min = eigs_r[0];
4034         if (pcbddc->use_coarse_estimates) {
4035           if (lambda_max>lambda_min) {
4036             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4037             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4038           }
4039         }
4040       }
4041 
4042       /* check coarse problem residual error */
4043       if (pcbddc->dbg_flag) {
4044         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4045         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4046         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4047         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4048         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4049         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4050         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4051         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4052         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4053         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4054         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4055         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4056         if (compute_eigs) {
4057           PetscReal lambda_max_s,lambda_min_s;
4058           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4059           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4060           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4061           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4062           for (i=0;i<neigs;i++) {
4063             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4064           }
4065         }
4066         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4067         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4068       }
4069       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4070       if (compute_eigs) {
4071         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4072         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4073       }
4074     }
4075   }
4076   /* print additional info */
4077   if (pcbddc->dbg_flag) {
4078     /* waits until all processes reaches this point */
4079     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4080     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4081     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4082   }
4083 
4084   /* free memory */
4085   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4086   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 #undef __FUNCT__
4091 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4092 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4093 {
4094   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4095   PC_IS*         pcis = (PC_IS*)pc->data;
4096   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4097   PetscInt       i,coarse_size;
4098   PetscInt       *local_primal_indices;
4099   PetscErrorCode ierr;
4100 
4101   PetscFunctionBegin;
4102   /* Compute global number of coarse dofs */
4103   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4104     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4105   }
4106   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4107 
4108   /* check numbering */
4109   if (pcbddc->dbg_flag) {
4110     PetscScalar coarsesum,*array;
4111     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4112 
4113     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4114     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4115     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4116     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4117     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4118     for (i=0;i<pcbddc->local_primal_size;i++) {
4119       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4120     }
4121     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4122     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4123     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4124     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4125     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4126     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4127     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4128     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4129     for (i=0;i<pcis->n;i++) {
4130       if (array[i] == 1.0) {
4131         set_error = PETSC_TRUE;
4132         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4133       }
4134     }
4135     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4136     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4137     for (i=0;i<pcis->n;i++) {
4138       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4139     }
4140     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4141     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4142     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4143     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4144     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4145     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4146     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4147       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4148       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4149       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4150       for (i=0;i<pcbddc->local_primal_size;i++) {
4151         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4152       }
4153       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4154     }
4155     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4156     if (set_error_reduced) {
4157       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4158     }
4159   }
4160   /* get back data */
4161   *coarse_size_n = coarse_size;
4162   *local_primal_indices_n = local_primal_indices;
4163   PetscFunctionReturn(0);
4164 }
4165 
4166 #undef __FUNCT__
4167 #define __FUNCT__ "PCBDDCGlobalToLocal"
4168 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4169 {
4170   IS             localis_t;
4171   PetscInt       i,lsize,*idxs,n;
4172   PetscScalar    *vals;
4173   PetscErrorCode ierr;
4174 
4175   PetscFunctionBegin;
4176   /* get indices in local ordering exploiting local to global map */
4177   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4178   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4179   for (i=0;i<lsize;i++) vals[i] = 1.0;
4180   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4181   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4182   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4183   if (idxs) { /* multilevel guard */
4184     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4185   }
4186   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4187   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4188   ierr = PetscFree(vals);CHKERRQ(ierr);
4189   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4190   /* now compute set in local ordering */
4191   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4192   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4193   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4194   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4195   for (i=0,lsize=0;i<n;i++) {
4196     if (PetscRealPart(vals[i]) > 0.5) {
4197       lsize++;
4198     }
4199   }
4200   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4201   for (i=0,lsize=0;i<n;i++) {
4202     if (PetscRealPart(vals[i]) > 0.5) {
4203       idxs[lsize++] = i;
4204     }
4205   }
4206   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4207   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4208   *localis = localis_t;
4209   PetscFunctionReturn(0);
4210 }
4211 
4212 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4213 #undef __FUNCT__
4214 #define __FUNCT__ "PCBDDCMatMult_Private"
4215 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4216 {
4217   PCBDDCChange_ctx change_ctx;
4218   PetscErrorCode   ierr;
4219 
4220   PetscFunctionBegin;
4221   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4222   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4223   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4224   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 #undef __FUNCT__
4229 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4230 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4231 {
4232   PCBDDCChange_ctx change_ctx;
4233   PetscErrorCode   ierr;
4234 
4235   PetscFunctionBegin;
4236   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4237   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4238   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4239   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4240   PetscFunctionReturn(0);
4241 }
4242