xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a773dcb89ba99d3dee7bbc2b7efd410358a2ee9e)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Compute matrix after change of basis and extract local submatrices */
18   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D */
21   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
22   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
23 
24   /* Setup local solvers ksp_D and ksp_R */
25   /* PCBDDCSetUpLocalScatters should be called first! */
26   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
27 
28   /* Change global null space passed in by the user if change of basis has been requested */
29   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
30     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
31   }
32 
33   /*
34      Setup local correction and local part of coarse basis.
35      Gives back the dense local part of the coarse matrix in column major ordering
36   */
37   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* Compute total number of coarse nodes and setup coarse solver */
40   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
41 
42   /* free */
43   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "PCBDDCResetCustomization"
49 PetscErrorCode PCBDDCResetCustomization(PC pc)
50 {
51   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
61   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
62   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
63   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
64   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
65   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "PCBDDCResetTopography"
71 PetscErrorCode PCBDDCResetTopography(PC pc)
72 {
73   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
78   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
79   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
80   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "PCBDDCResetSolvers"
86 PetscErrorCode PCBDDCResetSolvers(PC pc)
87 {
88   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
93   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
109   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
110   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
111   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
113   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
115   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_constraints,n_R,old_size;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   if (!pcbddc->ConstraintMatrix) {
131     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
132   }
133   /* get sizes */
134   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
135   n_R = pcis->n-pcbddc->n_actual_vertices;
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   /* local work vectors (try to avoid unneeded work)*/
138   /* R nodes */
139   old_size = -1;
140   if (pcbddc->vec1_R) {
141     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
142   }
143   if (n_R != old_size) {
144     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
149     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
150   }
151   /* local primal dofs */
152   old_size = -1;
153   if (pcbddc->vec1_P) {
154     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
155   }
156   if (pcbddc->local_primal_size != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
158     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
159     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
160     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
161   }
162   /* local explicit constraints */
163   old_size = -1;
164   if (pcbddc->vec1_C) {
165     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
166   }
167   if (n_constraints && n_constraints != old_size) {
168     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
169     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
170     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
171     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "PCBDDCSetUpCorrection"
178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
179 {
180   PetscErrorCode         ierr;
181   /* pointers to pcis and pcbddc */
182   PC_IS*                 pcis = (PC_IS*)pc->data;
183   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
184   /* submatrices of local problem */
185   Mat                    A_RV,A_VR,A_VV;
186   /* working matrices */
187   Mat                    M1,M2,M3,C_CR;
188   /* working vectors */
189   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
190   /* additional working stuff */
191   IS                     is_aux;
192   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
193   const PetscScalar      *array,*row_cmat_values;
194   const PetscInt         *row_cmat_indices,*idx_R_local;
195   PetscInt               *idx_V_B,*auxindices;
196   PetscInt               n_vertices,n_constraints,size_of_constraint;
197   PetscInt               i,j,n_R,n_D,n_B;
198   PetscBool              unsymmetric_check;
199   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
200   MatType                impMatType;
201   /* some shortcuts to scalars */
202   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
203   /* for debugging purposes */
204   PetscReal              *coarsefunctions_errors,*constraints_errors;
205 
206   PetscFunctionBegin;
207   /* get number of vertices (corners plus constraints with change of basis)
208      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
209   n_vertices = pcbddc->n_actual_vertices;
210   n_constraints = pcbddc->local_primal_size-n_vertices;
211   /* Set Non-overlapping dimensions */
212   n_B = pcis->n_B; n_D = pcis->n - n_B;
213   n_R = pcis->n-n_vertices;
214 
215   /* Set types for local objects needed by BDDC precondtioner */
216   impMatType = MATSEQDENSE;
217 
218   /* Allocating some extra storage just to be safe */
219   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
220   for (i=0;i<pcis->n;i++) auxindices[i]=i;
221 
222   /* vertices in boundary numbering */
223   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
224   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
225   if (i != n_vertices) {
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
227   }
228 
229   /* Precompute stuffs needed for preprocessing and application of BDDC*/
230   if (n_constraints) {
231     /* see if we can save some allocations */
232     if (pcbddc->local_auxmat2) {
233       PetscInt on_R,on_constraints;
234       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
235       if (on_R != n_R || on_constraints != n_constraints) {
236         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
237         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
238       }
239     }
240     /* work vectors */
241     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
242     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
243     /* auxiliary matrices */
244     if (!pcbddc->local_auxmat2) {
245       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
246       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
247       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
248       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
249     }
250 
251     /* Extract constraints on R nodes: C_{CR}  */
252     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
253     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
254     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
255 
256     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
257     for (i=0;i<n_constraints;i++) {
258       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
259       /* Get row of constraint matrix in R numbering */
260       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
262       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
263       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
264       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
265       /* Solve for row of constraint matrix in R numbering */
266       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
267       /* Set values in local_auxmat2 */
268       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
270       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
271     }
272     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
275 
276     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
277     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
278     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
279     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
280     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
281     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
282     ierr = MatSetUp(M1);CHKERRQ(ierr);
283     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
284     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
285     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
286     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
287     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
288     ierr = MatDestroy(&M2);CHKERRQ(ierr);
289     ierr = MatDestroy(&M3);CHKERRQ(ierr);
290     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
291     if (!pcbddc->local_auxmat1) {
292       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     } else {
294       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
295     }
296   }
297 
298   /* Get submatrices from subdomain matrix */
299   if (n_vertices) {
300     PetscInt ibs,mbs;
301     PetscBool issbaij;
302     Mat newmat;
303 
304     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
305     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
306     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
307     if (ibs != mbs) { /* need to convert to SEQAIJ */
308       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
310       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
311       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
313     } else {
314       /* this is safe */
315       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
316       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
317       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
318         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
319         /* which of the two approaches is faster? */
320         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
322         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
324         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
325       } else {
326         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
327         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
328       }
329     }
330     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
331     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
332     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
333   }
334 
335   /* Matrix of coarse basis functions (local) */
336   if (pcbddc->coarse_phi_B) {
337     PetscInt on_B,on_primal;
338     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
339     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
342     }
343   }
344   if (pcbddc->coarse_phi_D) {
345     PetscInt on_D,on_primal;
346     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
347     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
348       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
349       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
350     }
351   }
352   if (!pcbddc->coarse_phi_B) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
357   }
358   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
359     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
360     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
361     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
362     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
363   }
364 
365   if (pcbddc->dbg_flag) {
366     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
367     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
368     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
369   }
370   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
371   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
372 
373   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
374 
375   /* vertices */
376   for (i=0;i<n_vertices;i++) {
377     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
378     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
379     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
380     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
382     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
383     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
384     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
385     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
387     if (n_constraints) {
388       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
389       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
390       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
391     }
392     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
393     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
394 
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
398     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
399     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
402     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
414     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
415     if (n_constraints) {
416       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
417       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
418       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     }
420 
421     /* check */
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on local nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       if (n_R) {
427         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       }
429       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
430       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
432       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
433       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
434       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
435       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
437       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
438       if (n_constraints) {
439         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
442       }
443       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
444       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
445       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
446       /* check saddle point solution */
447       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
448       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
449       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
450       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
451       /* shift by the identity matrix */
452       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
453       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
454       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
455       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
456     }
457   }
458 
459   /* constraints */
460   for (i=0;i<n_constraints;i++) {
461     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
462     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
463     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
464     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
465     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
466     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
467     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
468     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
469     if (n_vertices) {
470       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
471     }
472     /* Set values in coarse basis function and subdomain part of coarse_mat */
473     /* coarse basis functions */
474     j = i+n_vertices; /* don't touch this! */
475     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
476     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
481     if (pcbddc->switch_static || pcbddc->dbg_flag) {
482       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
487     }
488     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
489     if (n_vertices) {
490       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
491       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
492       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
493     }
494     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
495     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
496     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
497 
498     if (pcbddc->dbg_flag) {
499       /* assemble subdomain vector on nodes */
500       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
502       if (n_R) {
503         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
504       }
505       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
506       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
507       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
508       /* assemble subdomain vector of lagrange multipliers */
509       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
510       if (n_vertices) {
511         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
512         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
514       }
515       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
517       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
518       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
521       /* check saddle point solution */
522       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
523       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
524       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
525       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
526       /* shift by the identity matrix */
527       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
528       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
529       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
530       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
531     }
532   }
533   /* call assembling routines for local coarse basis */
534   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   if (pcbddc->switch_static || pcbddc->dbg_flag) {
537     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
538     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
539   }
540 
541   /* compute other basis functions for non-symmetric problems */
542   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
543   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
544     PetscBool setsym;
545     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
546     if (!setsym) pcbddc->issym = PETSC_FALSE;
547   }
548 
549   if (!pcbddc->issym) {
550     if (!pcbddc->coarse_psi_B) {
551       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
552       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
553       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
554       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
555     }
556     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
557       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
558       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
559       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
560       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
561     }
562     for (i=0;i<pcbddc->local_primal_size;i++) {
563       if (n_constraints) {
564         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
565         for (j=0;j<n_constraints;j++) {
566           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
567         }
568         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
569         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
570       }
571       if (i<n_vertices) {
572         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
573         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
574         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
575         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
576         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
577         if (n_constraints) {
578           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
579         }
580       } else {
581         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
582       }
583       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
584       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
585       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
586       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
587       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
589       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
590       if (i<n_vertices) {
591         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
592       }
593       if (pcbddc->switch_static || pcbddc->dbg_flag) {
594         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
595         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
596         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
598         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
599       }
600 
601       if (pcbddc->dbg_flag) {
602         /* assemble subdomain vector on nodes */
603         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
604         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
605         if (n_R) {
606           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
607         }
608         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
609         if (i<n_vertices) {
610           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
611         }
612         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
614         /* assemble subdomain vector of lagrange multipliers */
615         for (j=0;j<pcbddc->local_primal_size;j++) {
616           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
617         }
618         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
620         /* check saddle point solution */
621         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
622         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
623         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
624         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
625         /* shift by the identity matrix */
626         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
627         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
628         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
629         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
630       }
631     }
632     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
633     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634     if (pcbddc->switch_static || pcbddc->dbg_flag) {
635       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
636       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
637     }
638     unsymmetric_check = PETSC_TRUE;
639   } else { /* take references to already computed coarse basis */
640     unsymmetric_check = PETSC_FALSE;
641     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
642     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
643     if (pcbddc->coarse_phi_D) {
644       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
645       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
646     }
647   }
648   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
649   /* Checking coarse_sub_mat and coarse basis functios */
650   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
651   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
652   if (pcbddc->dbg_flag) {
653     Mat         coarse_sub_mat;
654     Mat         AUXMAT,TM1,TM2,TM3,TM4;
655     Mat         coarse_phi_D,coarse_phi_B;
656     Mat         coarse_psi_D,coarse_psi_B;
657     Mat         A_II,A_BB,A_IB,A_BI;
658     MatType     checkmattype=MATSEQAIJ;
659     PetscReal   real_value;
660 
661     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
663     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
664     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
665     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
666     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
667     if (unsymmetric_check) {
668       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
669       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
670     }
671     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
672 
673     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
674     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
675     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
676     if (unsymmetric_check) {
677       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689     } else {
690       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
691       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
692       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
693       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
694       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
695       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
696       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
697       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
698     }
699     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
703     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
704     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
708     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
709     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
710     for (i=0;i<pcbddc->local_primal_size;i++) {
711       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
714     for (i=0;i<pcbddc->local_primal_size;i++) {
715       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
716     }
717     if (unsymmetric_check) {
718       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
719       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
720         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
721       }
722       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
723       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
724         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
725       }
726     }
727     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
731     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
734     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
735     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
736     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
737     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
738     if (unsymmetric_check) {
739       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
740       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
741     }
742     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
743     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
744     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
745     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
746   }
747   /* free memory */
748   if (n_vertices) {
749     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
750     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
752     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
753     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
754   }
755   if (n_constraints) {
756     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
757     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
758     ierr = MatDestroy(&M1);CHKERRQ(ierr);
759     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
760   }
761   ierr = PetscFree(auxindices);CHKERRQ(ierr);
762   /* get back data */
763   *coarse_submat_vals_n = coarse_submat_vals;
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
770 {
771   PC_IS*            pcis = (PC_IS*)(pc->data);
772   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
773   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
774   PetscBool         issbaij,isseqaij;
775   /* manage repeated solves */
776   MatReuse          reuse;
777   PetscErrorCode    ierr;
778 
779   PetscFunctionBegin;
780   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
781     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
782   }
783   /* get mat flags */
784   reuse = MAT_INITIAL_MATRIX;
785   if (pc->setupcalled) {
786     if (pc->flag == SAME_NONZERO_PATTERN) {
787       reuse = MAT_REUSE_MATRIX;
788     } else {
789       reuse = MAT_INITIAL_MATRIX;
790     }
791   }
792   if (reuse == MAT_INITIAL_MATRIX) {
793     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
794     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
795     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
796     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
797     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
798   }
799 
800   /* transform local matrices if needed */
801   if (pcbddc->ChangeOfBasisMatrix) {
802     Mat       *change_mat_all;
803     IS        is_local,is_global;
804     PetscBool sorted;
805     PetscInt  *idxs_perm;
806 
807     ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr);
808     ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
809     ierr = ISDestroy(&is_local);CHKERRQ(ierr);
810     ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
811     if (!sorted) {
812       const PetscInt *idxs;
813       PetscInt *idxs_sorted,i;
814 
815       ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr);
816       ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr);
817       for (i=0;i<pcis->n;i++) {
818         idxs_perm[i] = i;
819       }
820       ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
821       ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr);
822       for (i=0;i<pcis->n;i++) {
823         idxs_sorted[i] = idxs[idxs_perm[i]];
824       }
825       ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
826       ierr = ISDestroy(&is_global);CHKERRQ(ierr);
827       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
828     }
829 
830     /* get change of basis on the whole set of local dofs */
831     ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
832 
833     if (!sorted) {
834       Mat      new_mat;
835       IS       is_perm;
836       PetscInt *idxs,i;
837 
838       ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr);
839       for (i=0;i<pcis->n;i++) {
840         idxs[idxs_perm[i]] = i;
841       }
842       ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
843       ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
844       ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
845       ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
846       ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
847       change_mat_all[0] = new_mat;
848       ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
849     }
850 
851     /* check */
852     if (pcbddc->dbg_flag) {
853       Vec       x,x_change;
854       PetscReal error;
855 
856       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
857       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
858       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
859       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
860       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
863       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
864       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
865       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
866       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
867       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
868       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
869       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
870       ierr = VecDestroy(&x);CHKERRQ(ierr);
871       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
872     }
873 
874     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
875     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
876     if (isseqaij) {
877       ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
878     } else {
879       Mat work_mat;
880       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
881       ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
882       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
883     }
884     /*
885     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
886     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
887     */
888     ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
889     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
890   } else {
891     /* without change of basis, the local matrix is unchanged */
892     if (!pcbddc->local_mat) {
893       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
894       pcbddc->local_mat = matis->A;
895     }
896   }
897 
898   /* get submatrices */
899   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
900   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
901   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
902   if (!issbaij) {
903     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
904     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
905   } else {
906     Mat newmat;
907     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
908     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
909     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
910     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
918 {
919   PC_IS*         pcis = (PC_IS*)(pc->data);
920   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
921   IS             is_aux1,is_aux2;
922   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
923   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
924   PetscInt       vbs,bs;
925   PetscBT        bitmask;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   /*
930     No need to setup local scatters if
931       - primal space is unchanged
932         AND
933       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
934         AND
935       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
936   */
937   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
938     PetscFunctionReturn(0);
939   }
940   /* destroy old objects */
941   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
942   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
943   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
944   /* Set Non-overlapping dimensions */
945   n_B = pcis->n_B; n_D = pcis->n - n_B;
946   n_vertices = pcbddc->n_actual_vertices;
947   /* create auxiliary bitmask */
948   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
949   for (i=0;i<n_vertices;i++) {
950     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
951   }
952 
953   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
954   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
955   for (i=0, n_R=0; i<pcis->n; i++) {
956     if (!PetscBTLookup(bitmask,i)) {
957       idx_R_local[n_R] = i;
958       n_R++;
959     }
960   }
961 
962   /* Block code */
963   vbs = 1;
964   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
965   if (bs>1 && !(n_vertices%bs)) {
966     PetscBool is_blocked = PETSC_TRUE;
967     PetscInt  *vary;
968     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
969     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
970     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
971     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
972     for (i=0; i<n_vertices; i++) {
973       if (vary[i]!=0 && vary[i]!=bs) {
974         is_blocked = PETSC_FALSE;
975         break;
976       }
977     }
978     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
979       vbs = bs;
980       for (i=0;i<n_R/vbs;i++) {
981         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
982       }
983     }
984     ierr = PetscFree(vary);CHKERRQ(ierr);
985   }
986   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
987   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
988 
989   /* print some info if requested */
990   if (pcbddc->dbg_flag) {
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
993     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
996     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000 
1001   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1002   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1003   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
1004   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
1005   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1006   for (i=0; i<n_D; i++) {
1007     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1008   }
1009   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1010   for (i=0, j=0; i<n_R; i++) {
1011     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1012       aux_array1[j++] = i;
1013     }
1014   }
1015   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1016   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1017   for (i=0, j=0; i<n_B; i++) {
1018     if (!PetscBTLookup(bitmask,is_indices[i])) {
1019       aux_array2[j++] = i;
1020     }
1021   }
1022   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1023   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1024   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1025   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1026   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1027 
1028   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1029     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1030     for (i=0, j=0; i<n_R; i++) {
1031       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1032         aux_array1[j++] = i;
1033       }
1034     }
1035     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1036     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1038   }
1039   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1040   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1048 {
1049   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1050   PC_IS          *pcis = (PC_IS*)pc->data;
1051   PC             pc_temp;
1052   Mat            A_RR;
1053   MatReuse       reuse;
1054   PetscScalar    m_one = -1.0;
1055   PetscReal      value;
1056   PetscInt       n_D,n_R,ibs,mbs;
1057   PetscBool      use_exact,use_exact_reduced,issbaij;
1058   PetscErrorCode ierr;
1059   /* prefixes stuff */
1060   char           dir_prefix[256],neu_prefix[256],str_level[16];
1061   size_t         len;
1062 
1063   PetscFunctionBegin;
1064 
1065   /* compute prefixes */
1066   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1067   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1068   if (!pcbddc->current_level) {
1069     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1070     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1071     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1072     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1073   } else {
1074     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1075     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1076     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1077     len -= 15; /* remove "pc_bddc_coarse_" */
1078     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1079     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1080     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1081     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1082     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1083     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1084     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1085     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1086   }
1087 
1088   /* DIRICHLET PROBLEM */
1089   /* Matrix for Dirichlet problem is pcis->A_II */
1090   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1091   if (!pcbddc->ksp_D) { /* create object if not yet build */
1092     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1093     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1094     /* default */
1095     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1096     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1097     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1098     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1099     if (issbaij) {
1100       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1101     } else {
1102       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1103     }
1104     /* Allow user's customization */
1105     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1106     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1107   }
1108   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1109   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1110   if (!n_D) {
1111     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1112     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1113   }
1114   /* Set Up KSP for Dirichlet problem of BDDC */
1115   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1116   /* set ksp_D into pcis data */
1117   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1118   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1119   pcis->ksp_D = pcbddc->ksp_D;
1120 
1121   /* NEUMANN PROBLEM */
1122   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1123   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1124   if (pcbddc->ksp_R) { /* already created ksp */
1125     PetscInt nn_R;
1126     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1127     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1128     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1129     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1130       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1131       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1132       reuse = MAT_INITIAL_MATRIX;
1133     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1134       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1135         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1136         reuse = MAT_INITIAL_MATRIX;
1137       } else { /* safe to reuse the matrix */
1138         reuse = MAT_REUSE_MATRIX;
1139       }
1140     }
1141     /* last check */
1142     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1143       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1144       reuse = MAT_INITIAL_MATRIX;
1145     }
1146   } else { /* first time, so we need to create the matrix */
1147     reuse = MAT_INITIAL_MATRIX;
1148   }
1149   /* extract A_RR */
1150   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1151   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1152   if (ibs != mbs) {
1153     Mat newmat;
1154     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1155     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1156     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1157   } else {
1158     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1159   }
1160   if (!pcbddc->ksp_R) { /* create object if not present */
1161     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1162     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1163     /* default */
1164     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1165     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1166     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1167     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1168     if (issbaij) {
1169       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1170     } else {
1171       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1172     }
1173     /* Allow user's customization */
1174     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1175     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1176   }
1177   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1178   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1179   if (!n_R) {
1180     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1181     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1182   }
1183   /* Set Up KSP for Neumann problem of BDDC */
1184   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1185 
1186   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1187   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1188     /* Dirichlet */
1189     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1190     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1191     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1192     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1193     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1194     /* need to be adapted? */
1195     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1196     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1197     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1198     /* print info */
1199     if (pcbddc->dbg_flag) {
1200       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1201       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1202       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1203       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1204       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1205       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1206     }
1207     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1208       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1209     }
1210 
1211     /* Neumann */
1212     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1213     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1214     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1215     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1216     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1217     /* need to be adapted? */
1218     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1219     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1220     /* print info */
1221     if (pcbddc->dbg_flag) {
1222       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1223       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1224     }
1225     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1226       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1227     }
1228   }
1229   /* free Neumann problem's matrix */
1230   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1236 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1237 {
1238   PetscErrorCode ierr;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1240 
1241   PetscFunctionBegin;
1242   if (applytranspose) {
1243     if (pcbddc->local_auxmat1) {
1244       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1245       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1246     }
1247     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1248   } else {
1249     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1250     if (pcbddc->local_auxmat1) {
1251       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1252       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1259 #undef __FUNCT__
1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1261 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1262 {
1263   PetscErrorCode ierr;
1264   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1265   PC_IS*            pcis = (PC_IS*)  (pc->data);
1266   const PetscScalar zero = 0.0;
1267 
1268   PetscFunctionBegin;
1269   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1270   if (applytranspose) {
1271     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1272     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1273   } else {
1274     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1275     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1276   }
1277   /* start communications from local primal nodes to rhs of coarse solver */
1278   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1279   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281 
1282   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1283   /* TODO remove null space when doing multilevel */
1284   if (pcbddc->coarse_ksp) {
1285     if (applytranspose) {
1286       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1287     } else {
1288       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1289     }
1290   }
1291 
1292   /* Local solution on R nodes */
1293   if (pcis->n) {
1294     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1295     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1296     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1297     if (pcbddc->switch_static) {
1298       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1299       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1300     }
1301     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1302     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1303     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305     if (pcbddc->switch_static) {
1306       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308     }
1309   }
1310 
1311   /* communications from coarse sol to local primal nodes */
1312   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1313   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1314 
1315   /* Sum contributions from two levels */
1316   if (applytranspose) {
1317     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1318     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1319   } else {
1320     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1321     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1327 #undef __FUNCT__
1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1330 {
1331   PetscErrorCode ierr;
1332   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1333   PetscScalar    *array,*array2;
1334   Vec            from,to;
1335 
1336   PetscFunctionBegin;
1337   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1338     from = pcbddc->coarse_vec;
1339     to = pcbddc->vec1_P;
1340     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1341       Vec tvec;
1342       PetscInt lsize;
1343       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1344       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1345       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1346       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1347       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1348       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1349       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1350     }
1351   } else { /* from local to global -> put data in coarse right hand side */
1352     from = pcbddc->vec1_P;
1353     to = pcbddc->coarse_vec;
1354   }
1355   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1362 {
1363   PetscErrorCode ierr;
1364   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1365   PetscScalar    *array,*array2;
1366   Vec            from,to;
1367 
1368   PetscFunctionBegin;
1369   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1370     from = pcbddc->coarse_vec;
1371     to = pcbddc->vec1_P;
1372   } else { /* from local to global -> put data in coarse right hand side */
1373     from = pcbddc->vec1_P;
1374     to = pcbddc->coarse_vec;
1375   }
1376   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1377   if (smode == SCATTER_FORWARD) {
1378     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1379       Vec tvec;
1380       PetscInt lsize;
1381       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1382       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1383       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1384       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1385       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1386       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1387       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1388     }
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /* uncomment for testing purposes */
1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1398 {
1399   PetscErrorCode    ierr;
1400   PC_IS*            pcis = (PC_IS*)(pc->data);
1401   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1402   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1403   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1404   MatType           impMatType=MATSEQAIJ;
1405   /* one and zero */
1406   PetscScalar       one=1.0,zero=0.0;
1407   /* space to store constraints and their local indices */
1408   PetscScalar       *temp_quadrature_constraint;
1409   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1410   /* iterators */
1411   PetscInt          i,j,k,total_counts,temp_start_ptr;
1412   /* stuff to store connected components stored in pcbddc->mat_graph */
1413   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1414   PetscInt          n_ISForFaces,n_ISForEdges;
1415   /* near null space stuff */
1416   MatNullSpace      nearnullsp;
1417   const Vec         *nearnullvecs;
1418   Vec               *localnearnullsp;
1419   PetscBool         nnsp_has_cnst;
1420   PetscInt          nnsp_size;
1421   PetscScalar       *array;
1422   /* BLAS integers */
1423   PetscBLASInt      lwork,lierr;
1424   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1425   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1426   /* LAPACK working arrays for SVD or POD */
1427   PetscBool         skip_lapack;
1428   PetscScalar       *work;
1429   PetscReal         *singular_vals;
1430 #if defined(PETSC_USE_COMPLEX)
1431   PetscReal         *rwork;
1432 #endif
1433 #if defined(PETSC_MISSING_LAPACK_GESVD)
1434   PetscBLASInt      Blas_one_2=1;
1435   PetscScalar       *temp_basis,*correlation_mat;
1436 #else
1437   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1438   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1439 #endif
1440   /* reuse */
1441   PetscInt          olocal_primal_size;
1442   PetscInt          *oprimal_indices_local_idxs;
1443   /* change of basis */
1444   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1445   PetscBool         boolforchange,qr_needed;
1446   PetscBT           touched,change_basis,qr_needed_idx;
1447   /* auxiliary stuff */
1448   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1449   /* some quantities */
1450   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1451   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1452 
1453 
1454   PetscFunctionBegin;
1455   /* Destroy Mat objects computed previously */
1456   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1457   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1458   /* Get index sets for faces, edges and vertices from graph */
1459   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1460   /* free unneeded index sets */
1461   if (!pcbddc->use_vertices) {
1462     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1463   }
1464   if (!pcbddc->use_edges) {
1465     for (i=0;i<n_ISForEdges;i++) {
1466       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1467     }
1468     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1469     n_ISForEdges = 0;
1470   }
1471   if (!pcbddc->use_faces) {
1472     for (i=0;i<n_ISForFaces;i++) {
1473       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1474     }
1475     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1476     n_ISForFaces = 0;
1477   }
1478   /* HACKS (the following two blocks of code) */
1479   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1480     pcbddc->use_change_of_basis = PETSC_TRUE;
1481     if (!ISForEdges) {
1482       pcbddc->use_change_on_faces = PETSC_TRUE;
1483     }
1484   }
1485   if (pcbddc->NullSpace) {
1486     /* use_change_of_basis should be consistent among processors */
1487     PetscBool tbool[2],gbool[2];
1488     tbool [0] = pcbddc->use_change_of_basis;
1489     tbool [1] = pcbddc->use_change_on_faces;
1490     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1491     pcbddc->use_change_of_basis = gbool[0];
1492     pcbddc->use_change_on_faces = gbool[1];
1493   }
1494   /* print some info */
1495   if (pcbddc->dbg_flag) {
1496     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1497     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1498     i = 0;
1499     if (ISForVertices) {
1500       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1501     }
1502     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1503     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1504     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1506   }
1507   /* check if near null space is attached to global mat */
1508   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1509   if (nearnullsp) {
1510     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1511     /* remove any stored info */
1512     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1513     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1514     /* store information for BDDC solver reuse */
1515     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1516     pcbddc->onearnullspace = nearnullsp;
1517     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1518     for (i=0;i<nnsp_size;i++) {
1519       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1520     }
1521   } else { /* if near null space is not provided BDDC uses constants by default */
1522     nnsp_size = 0;
1523     nnsp_has_cnst = PETSC_TRUE;
1524   }
1525   /* get max number of constraints on a single cc */
1526   max_constraints = nnsp_size;
1527   if (nnsp_has_cnst) max_constraints++;
1528 
1529   /*
1530        Evaluate maximum storage size needed by the procedure
1531        - temp_indices will contain start index of each constraint stored as follows
1532        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1533        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1534        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1535                                                                                                                                                          */
1536   total_counts = n_ISForFaces+n_ISForEdges;
1537   total_counts *= max_constraints;
1538   n_vertices = 0;
1539   if (ISForVertices) {
1540     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1541   }
1542   total_counts += n_vertices;
1543   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1544   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1545   total_counts = 0;
1546   max_size_of_constraint = 0;
1547   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1548     if (i<n_ISForEdges) {
1549       used_IS = &ISForEdges[i];
1550     } else {
1551       used_IS = &ISForFaces[i-n_ISForEdges];
1552     }
1553     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1554     total_counts += j;
1555     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1556   }
1557   total_counts *= max_constraints;
1558   total_counts += n_vertices;
1559   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1560   /* get local part of global near null space vectors */
1561   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1562   for (k=0;k<nnsp_size;k++) {
1563     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1564     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566   }
1567 
1568   /* whether or not to skip lapack calls */
1569   skip_lapack = PETSC_TRUE;
1570   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1571 
1572   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1573   if (!skip_lapack) {
1574     PetscScalar temp_work;
1575 #if defined(PETSC_MISSING_LAPACK_GESVD)
1576     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1577     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1578     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1579     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1580 #if defined(PETSC_USE_COMPLEX)
1581     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1582 #endif
1583     /* now we evaluate the optimal workspace using query with lwork=-1 */
1584     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1585     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1586     lwork = -1;
1587     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1588 #if !defined(PETSC_USE_COMPLEX)
1589     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1590 #else
1591     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1592 #endif
1593     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1594     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1595 #else /* on missing GESVD */
1596     /* SVD */
1597     PetscInt max_n,min_n;
1598     max_n = max_size_of_constraint;
1599     min_n = max_constraints;
1600     if (max_size_of_constraint < max_constraints) {
1601       min_n = max_size_of_constraint;
1602       max_n = max_constraints;
1603     }
1604     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1605 #if defined(PETSC_USE_COMPLEX)
1606     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1607 #endif
1608     /* now we evaluate the optimal workspace using query with lwork=-1 */
1609     lwork = -1;
1610     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1611     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1612     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1613     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1614 #if !defined(PETSC_USE_COMPLEX)
1615     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1616 #else
1617     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1618 #endif
1619     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1620     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1621 #endif /* on missing GESVD */
1622     /* Allocate optimal workspace */
1623     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1624     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1625   }
1626   /* Now we can loop on constraining sets */
1627   total_counts = 0;
1628   temp_indices[0] = 0;
1629   /* vertices */
1630   if (ISForVertices) {
1631     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1632     if (nnsp_has_cnst) { /* consider all vertices */
1633       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1634       for (i=0;i<n_vertices;i++) {
1635         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1636         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1637         total_counts++;
1638       }
1639     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1640       PetscBool used_vertex;
1641       for (i=0;i<n_vertices;i++) {
1642         used_vertex = PETSC_FALSE;
1643         k = 0;
1644         while (!used_vertex && k<nnsp_size) {
1645           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1646           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1647             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1648             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1649             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1650             total_counts++;
1651             used_vertex = PETSC_TRUE;
1652           }
1653           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1654           k++;
1655         }
1656       }
1657     }
1658     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1659     n_vertices = total_counts;
1660   }
1661 
1662   /* edges and faces */
1663   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1664     if (i<n_ISForEdges) {
1665       used_IS = &ISForEdges[i];
1666       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1667     } else {
1668       used_IS = &ISForFaces[i-n_ISForEdges];
1669       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1670     }
1671     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1672     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1673     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1674     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1675     /* change of basis should not be performed on local periodic nodes */
1676     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1677     if (nnsp_has_cnst) {
1678       PetscScalar quad_value;
1679       temp_constraints++;
1680       if (!pcbddc->use_nnsp_true) {
1681         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1682       } else {
1683         quad_value = 1.0;
1684       }
1685       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1686       for (j=0;j<size_of_constraint;j++) {
1687         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1688       }
1689       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1690       total_counts++;
1691     }
1692     for (k=0;k<nnsp_size;k++) {
1693       PetscReal real_value;
1694       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1695       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1696       for (j=0;j<size_of_constraint;j++) {
1697         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1698       }
1699       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1700       /* check if array is null on the connected component */
1701       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1702       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1703       if (real_value > 0.0) { /* keep indices and values */
1704         temp_constraints++;
1705         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1706         total_counts++;
1707       }
1708     }
1709     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1710     valid_constraints = temp_constraints;
1711     if (!pcbddc->use_nnsp_true && temp_constraints) {
1712       if (temp_constraints == 1) { /* just normalize the constraint */
1713         PetscScalar norm;
1714         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1715         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1716         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1717         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1718       } else { /* perform SVD */
1719         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1720 
1721 #if defined(PETSC_MISSING_LAPACK_GESVD)
1722         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1723            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1724            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1725               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1726               from that computed using LAPACKgesvd
1727            -> This is due to a different computation of eigenvectors in LAPACKheev
1728            -> The quality of the POD-computed basis will be the same */
1729         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1730         /* Store upper triangular part of correlation matrix */
1731         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1732         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1733         for (j=0;j<temp_constraints;j++) {
1734           for (k=0;k<j+1;k++) {
1735             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1736           }
1737         }
1738         /* compute eigenvalues and eigenvectors of correlation matrix */
1739         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1740         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1741 #if !defined(PETSC_USE_COMPLEX)
1742         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1743 #else
1744         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1745 #endif
1746         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1747         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1748         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1749         j=0;
1750         while (j < temp_constraints && singular_vals[j] < tol) j++;
1751         total_counts=total_counts-j;
1752         valid_constraints = temp_constraints-j;
1753         /* scale and copy POD basis into used quadrature memory */
1754         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1755         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1756         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1757         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1758         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1759         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1760         if (j<temp_constraints) {
1761           PetscInt ii;
1762           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1763           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1764           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1765           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1766           for (k=0;k<temp_constraints-j;k++) {
1767             for (ii=0;ii<size_of_constraint;ii++) {
1768               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1769             }
1770           }
1771         }
1772 #else  /* on missing GESVD */
1773         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1774         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1775         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1776         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1777 #if !defined(PETSC_USE_COMPLEX)
1778         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1779 #else
1780         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1781 #endif
1782         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1783         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1784         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1785         k = temp_constraints;
1786         if (k > size_of_constraint) k = size_of_constraint;
1787         j = 0;
1788         while (j < k && singular_vals[k-j-1] < tol) j++;
1789         total_counts = total_counts-temp_constraints+k-j;
1790         valid_constraints = k-j;
1791 #endif /* on missing GESVD */
1792       }
1793     }
1794     /* setting change_of_basis flag is safe now */
1795     if (boolforchange) {
1796       for (j=0;j<valid_constraints;j++) {
1797         PetscBTSet(change_basis,total_counts-j-1);
1798       }
1799     }
1800   }
1801   /* free index sets of faces, edges and vertices */
1802   for (i=0;i<n_ISForFaces;i++) {
1803     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1804   }
1805   if (n_ISForFaces) {
1806     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1807   }
1808   for (i=0;i<n_ISForEdges;i++) {
1809     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1810   }
1811   if (n_ISForEdges) {
1812     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1813   }
1814   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1815   /* map temp_indices_to_constraint in boundary numbering */
1816   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1817   if (i != temp_indices[total_counts]) {
1818     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1819   }
1820 
1821   /* free workspace */
1822   if (!skip_lapack) {
1823     ierr = PetscFree(work);CHKERRQ(ierr);
1824 #if defined(PETSC_USE_COMPLEX)
1825     ierr = PetscFree(rwork);CHKERRQ(ierr);
1826 #endif
1827     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1828 #if defined(PETSC_MISSING_LAPACK_GESVD)
1829     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1830     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1831 #endif
1832   }
1833   for (k=0;k<nnsp_size;k++) {
1834     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1835   }
1836   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1837 
1838   /* set quantities in pcbddc data structure and store previous primal size */
1839   /* n_vertices defines the number of subdomain corners in the primal space */
1840   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1841   olocal_primal_size = pcbddc->local_primal_size;
1842   pcbddc->local_primal_size = total_counts;
1843   pcbddc->n_vertices = n_vertices;
1844   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1845 
1846   /* Create constraint matrix */
1847   /* The constraint matrix is used to compute the l2g map of primal dofs */
1848   /* so we need to set it up properly either with or without change of basis */
1849   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1850   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1851   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1852   /* array to compute a local numbering of constraints : vertices first then constraints */
1853   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1854   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1855   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1856   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1857   /* auxiliary stuff for basis change */
1858   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1859   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1860 
1861   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1862   total_primal_vertices=0;
1863   for (i=0;i<pcbddc->local_primal_size;i++) {
1864     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1865     if (size_of_constraint == 1) {
1866       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1867       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1868       aux_primal_minloc[total_primal_vertices]=0;
1869       total_primal_vertices++;
1870     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1871       PetscInt min_loc,min_index;
1872       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1873       /* find first untouched local node */
1874       k = 0;
1875       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1876       min_index = global_indices[k];
1877       min_loc = k;
1878       /* search the minimum among global nodes already untouched on the cc */
1879       for (k=1;k<size_of_constraint;k++) {
1880         /* there can be more than one constraint on a single connected component */
1881         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1882           min_index = global_indices[k];
1883           min_loc = k;
1884         }
1885       }
1886       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1887       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1888       aux_primal_minloc[total_primal_vertices]=min_loc;
1889       total_primal_vertices++;
1890     }
1891   }
1892   /* determine if a QR strategy is needed for change of basis */
1893   qr_needed = PETSC_FALSE;
1894   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1895   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1896     if (PetscBTLookup(change_basis,i)) {
1897       if (!pcbddc->use_qr_single) {
1898         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1899         j = 0;
1900         for (k=0;k<size_of_constraint;k++) {
1901           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1902             j++;
1903           }
1904         }
1905         /* found more than one primal dof on the cc */
1906         if (j > 1) {
1907           PetscBTSet(qr_needed_idx,i);
1908           qr_needed = PETSC_TRUE;
1909         }
1910       } else {
1911         PetscBTSet(qr_needed_idx,i);
1912         qr_needed = PETSC_TRUE;
1913       }
1914     }
1915   }
1916   /* free workspace */
1917   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1918 
1919   /* permute indices in order to have a sorted set of vertices */
1920   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1921 
1922   /* nonzero structure of constraint matrix */
1923   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1924   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1925   j=total_primal_vertices;
1926   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1927     if (!PetscBTLookup(change_basis,i)) {
1928       nnz[j]=temp_indices[i+1]-temp_indices[i];
1929       j++;
1930     }
1931   }
1932   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1933   ierr = PetscFree(nnz);CHKERRQ(ierr);
1934   /* set values in constraint matrix */
1935   for (i=0;i<total_primal_vertices;i++) {
1936     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1937   }
1938   total_counts = total_primal_vertices;
1939   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1940     if (!PetscBTLookup(change_basis,i)) {
1941       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1942       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1943       total_counts++;
1944     }
1945   }
1946   /* assembling */
1947   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1948   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1949   /*
1950   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1951   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1952   */
1953   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1954   if (pcbddc->use_change_of_basis) {
1955     /* dual and primal dofs on a single cc */
1956     PetscInt     dual_dofs,primal_dofs;
1957     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1958     PetscInt     primal_counter;
1959     /* working stuff for GEQRF */
1960     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1961     PetscBLASInt lqr_work;
1962     /* working stuff for UNGQR */
1963     PetscScalar  *gqr_work,lgqr_work_t;
1964     PetscBLASInt lgqr_work;
1965     /* working stuff for TRTRS */
1966     PetscScalar  *trs_rhs;
1967     PetscBLASInt Blas_NRHS;
1968     /* pointers for values insertion into change of basis matrix */
1969     PetscInt     *start_rows,*start_cols;
1970     PetscScalar  *start_vals;
1971     /* working stuff for values insertion */
1972     PetscBT      is_primal;
1973     /* matrix sizes */
1974     PetscInt     global_size,local_size;
1975     /* work array for nonzeros */
1976     PetscScalar  *nnz_array;
1977     /* temporary change of basis */
1978     Mat          localChangeOfBasisMatrix;
1979     /* auxiliary work for global change of basis */
1980     Vec          nnz_vec;
1981     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
1982     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
1983     PetscScalar  *vals;
1984     PetscBool    done;
1985 
1986     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
1987     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
1988     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1989     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1990 
1991     /* nonzeros for local mat */
1992     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1993     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1994     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1995       if (PetscBTLookup(change_basis,i)) {
1996         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1997         if (PetscBTLookup(qr_needed_idx,i)) {
1998           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1999         } else {
2000           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2001           /* get local primal index on the cc */
2002           j = 0;
2003           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2004           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2005         }
2006       }
2007     }
2008     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2009     /* Set initial identity in the matrix */
2010     for (i=0;i<pcis->n_B;i++) {
2011       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2012     }
2013 
2014     if (pcbddc->dbg_flag) {
2015       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2016       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2017     }
2018 
2019 
2020     /* Now we loop on the constraints which need a change of basis */
2021     /*
2022        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2023        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2024 
2025        Basic blocks of change of basis matrix T computed by
2026 
2027           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2028 
2029             | 1        0   ...        0         s_1/S |
2030             | 0        1   ...        0         s_2/S |
2031             |              ...                        |
2032             | 0        ...            1     s_{n-1}/S |
2033             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2034 
2035             with S = \sum_{i=1}^n s_i^2
2036             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2037                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2038 
2039           - QR decomposition of constraints otherwise
2040     */
2041     if (qr_needed) {
2042       /* space to store Q */
2043       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
2044       /* first we issue queries for optimal work */
2045       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2046       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2047       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2048       lqr_work = -1;
2049       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2050       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2051       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2052       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2053       lgqr_work = -1;
2054       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2055       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2056       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2057       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2058       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2059       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2060       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2061       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2062       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2063       /* array to store scaling factors for reflectors */
2064       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2065       /* array to store rhs and solution of triangular solver */
2066       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2067       /* allocating workspace for check */
2068       if (pcbddc->dbg_flag) {
2069         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2070       }
2071     }
2072     /* array to store whether a node is primal or not */
2073     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2074     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2075     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2076     if (i != total_primal_vertices) {
2077       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2078     }
2079     for (i=0;i<total_primal_vertices;i++) {
2080       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2081     }
2082     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2083 
2084     /* loop on constraints and see whether or not they need a change of basis and compute it */
2085     /* -> using implicit ordering contained in temp_indices data */
2086     total_counts = pcbddc->n_vertices;
2087     primal_counter = total_counts;
2088     while (total_counts<pcbddc->local_primal_size) {
2089       primal_dofs = 1;
2090       if (PetscBTLookup(change_basis,total_counts)) {
2091         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2092         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2093           primal_dofs++;
2094         }
2095         /* get constraint info */
2096         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2097         dual_dofs = size_of_constraint-primal_dofs;
2098 
2099         if (pcbddc->dbg_flag) {
2100           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2101         }
2102 
2103         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2104 
2105           /* copy quadrature constraints for change of basis check */
2106           if (pcbddc->dbg_flag) {
2107             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2108           }
2109           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2110           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2111 
2112           /* compute QR decomposition of constraints */
2113           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2115           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2116           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2117           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2118           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2119           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2120 
2121           /* explictly compute R^-T */
2122           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2123           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2124           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2125           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2126           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2127           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2128           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2129           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2130           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2131           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2132 
2133           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2138           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2139           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2140           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2141           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2142 
2143           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2144              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2145              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2146           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2147           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2148           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2150           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2151           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2152           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2153           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2154           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2155           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2156 
2157           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2158           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2159           /* insert cols for primal dofs */
2160           for (j=0;j<primal_dofs;j++) {
2161             start_vals = &qr_basis[j*size_of_constraint];
2162             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2163             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2164           }
2165           /* insert cols for dual dofs */
2166           for (j=0,k=0;j<dual_dofs;k++) {
2167             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2168               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2169               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2170               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2171               j++;
2172             }
2173           }
2174 
2175           /* check change of basis */
2176           if (pcbddc->dbg_flag) {
2177             PetscInt   ii,jj;
2178             PetscBool valid_qr=PETSC_TRUE;
2179             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2180             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2181             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2182             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2183             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2184             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2185             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2186             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2187             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2188             for (jj=0;jj<size_of_constraint;jj++) {
2189               for (ii=0;ii<primal_dofs;ii++) {
2190                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2191                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2192               }
2193             }
2194             if (!valid_qr) {
2195               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2196               for (jj=0;jj<size_of_constraint;jj++) {
2197                 for (ii=0;ii<primal_dofs;ii++) {
2198                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2199                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2200                   }
2201                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2202                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2203                   }
2204                 }
2205               }
2206             } else {
2207               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2208             }
2209           }
2210         } else { /* simple transformation block */
2211           PetscInt    row,col;
2212           PetscScalar val,norm;
2213 
2214           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2215           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2216           for (j=0;j<size_of_constraint;j++) {
2217             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2218             if (!PetscBTLookup(is_primal,row)) {
2219               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2220               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2221               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2222             } else {
2223               for (k=0;k<size_of_constraint;k++) {
2224                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2225                 if (row != col) {
2226                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2227                 } else {
2228                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2229                 }
2230                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2231               }
2232             }
2233           }
2234           if (pcbddc->dbg_flag) {
2235             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2236           }
2237         }
2238         /* increment primal counter */
2239         primal_counter += primal_dofs;
2240       } else {
2241         if (pcbddc->dbg_flag) {
2242           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2243         }
2244       }
2245       /* increment constraint counter total_counts */
2246       total_counts += primal_dofs;
2247     }
2248 
2249     /* free workspace */
2250     if (qr_needed) {
2251       if (pcbddc->dbg_flag) {
2252         ierr = PetscFree(work);CHKERRQ(ierr);
2253       }
2254       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2255       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2256       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2257       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2258       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2259     }
2260     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2261     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2262     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2263 
2264     /* assembling of global change of variable */
2265     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2266     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2267     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2268     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2269     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2270     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2271 
2272     /* nonzeros (overestimated) */
2273     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2274     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2275     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2276     for (i=0;i<pcis->n;i++) {
2277       nnz_array[i] = 1.0;
2278       idxs_all[i] = i;
2279     }
2280     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2281     for (i=0;i<pcis->n_B;i++) {
2282       nnz_array[idxs_B[i]] = nnz[i];
2283     }
2284     if (pcis->n) {
2285       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2286     }
2287     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2288     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2289     ierr = PetscFree(nnz);CHKERRQ(ierr);
2290     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2291     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2292     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2293     for (i=0;i<local_size;i++) {
2294       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2295       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2296     }
2297     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2298     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2299     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2300     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2301 
2302     /* Set identity on dirichlet dofs */
2303     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2304     for (i=0;i<pcis->n-pcis->n_B;i++) {
2305       PetscScalar one=1.0;
2306       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2307     }
2308     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2309 
2310     /* Set values at interface dofs */
2311     done = PETSC_TRUE;
2312     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2313     if (!done) {
2314       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2315     }
2316     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2317     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2318     ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2319     for (i=0;i<nvtxs;i++) {
2320       PetscInt    row,*cols,ncols;
2321       PetscScalar *mat_vals;
2322 
2323       row = idxs_B[i];
2324       ncols = xadj[i+1]-xadj[i];
2325       cols = idxs_mapped+xadj[i];
2326       mat_vals = vals+xadj[i];
2327       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2328     }
2329     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2330     if (!done) {
2331       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2332     }
2333     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2334     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2335     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2336     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2337     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338 
2339     /* check */
2340     if (pcbddc->dbg_flag) {
2341       PetscReal error;
2342       Vec       x,x_change;
2343 
2344       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2345       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2346       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2347       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2348       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2349       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2350       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2351       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2352       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2353       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2354       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2355       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2356       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2357       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2358       ierr = VecDestroy(&x);CHKERRQ(ierr);
2359       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2360     }
2361     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2362   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2363     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2364     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2365   }
2366 
2367   /* set up change of basis context */
2368   if (pcbddc->ChangeOfBasisMatrix) {
2369     PCBDDCChange_ctx change_ctx;
2370 
2371     if (!pcbddc->new_global_mat) {
2372       PetscInt global_size,local_size;
2373 
2374       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2375       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2376       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2377       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2378       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2379       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2380       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2381       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2382       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2383     } else {
2384       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2385       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2386       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2387     }
2388     if (!pcbddc->user_ChangeOfBasisMatrix) {
2389       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2390       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2391     } else {
2392       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2393       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2394     }
2395     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2396     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2397     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   }
2400 
2401   /* get indices in local ordering for vertices and constraints */
2402   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2403     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2404     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2405   }
2406   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2407   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2408   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2409   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2410   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2411   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2412   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2413   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2414   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2415   /* set quantities in PCBDDC data struct */
2416   pcbddc->n_actual_vertices = i;
2417   /* check if a new primal space has been introduced */
2418   pcbddc->new_primal_space_local = PETSC_TRUE;
2419   if (olocal_primal_size == pcbddc->local_primal_size) {
2420     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2421     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2422     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2423   }
2424   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2425   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2426 
2427   /* flush dbg viewer */
2428   if (pcbddc->dbg_flag) {
2429     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2430   }
2431 
2432   /* free workspace */
2433   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2434   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2435   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2436   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2437   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2438   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2444 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2445 {
2446   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2447   PC_IS       *pcis = (PC_IS*)pc->data;
2448   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2449   PetscInt    ierr,i,vertex_size;
2450   PetscViewer viewer=pcbddc->dbg_viewer;
2451 
2452   PetscFunctionBegin;
2453   /* Reset previously computed graph */
2454   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2455   /* Init local Graph struct */
2456   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2457 
2458   /* Check validity of the csr graph passed in by the user */
2459   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2460     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2461   }
2462 
2463   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2464   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2465     Mat mat_adj;
2466     const PetscInt *xadj,*adjncy;
2467     PetscBool flg_row=PETSC_TRUE;
2468 
2469     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2470     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2471     if (!flg_row) {
2472       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2473     }
2474     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2475     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2476     if (!flg_row) {
2477       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2478     }
2479     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2480     pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2481   }
2482 
2483   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2484   vertex_size = 1;
2485   if (pcbddc->user_provided_isfordofs) {
2486     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2487       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2488       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2489         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2490         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2491       }
2492       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2493       pcbddc->n_ISForDofs = 0;
2494       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2495     }
2496     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2497     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2498   } else {
2499     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2500       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2501       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2502       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2503         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2504       }
2505     }
2506   }
2507 
2508   /* Setup of Graph */
2509   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2510     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2511   }
2512   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2513     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2514   }
2515   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2516 
2517   /* Graph's connected components analysis */
2518   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2519 
2520   /* print some info to stdout */
2521   if (pcbddc->dbg_flag) {
2522     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2523   }
2524 
2525   /* mark topography has done */
2526   pcbddc->recompute_topography = PETSC_FALSE;
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2532 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2533 {
2534   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2535   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   n = 0;
2540   vertices = 0;
2541   if (pcbddc->ConstraintMatrix) {
2542     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2543     for (i=0;i<local_primal_size;i++) {
2544       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2545       if (size_of_constraint == 1) n++;
2546       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2547     }
2548     if (vertices_idx) {
2549       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2550       n = 0;
2551       for (i=0;i<local_primal_size;i++) {
2552         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2553         if (size_of_constraint == 1) {
2554           vertices[n++]=row_cmat_indices[0];
2555         }
2556         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2557       }
2558     }
2559   }
2560   *n_vertices = n;
2561   if (vertices_idx) *vertices_idx = vertices;
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2567 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2568 {
2569   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2570   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2571   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2572   PetscBT        touched;
2573   PetscErrorCode ierr;
2574 
2575     /* This function assumes that the number of local constraints per connected component
2576        is not greater than the number of nodes defined for the connected component
2577        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2578   PetscFunctionBegin;
2579   n = 0;
2580   constraints_index = 0;
2581   if (pcbddc->ConstraintMatrix) {
2582     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2583     max_size_of_constraint = 0;
2584     for (i=0;i<local_primal_size;i++) {
2585       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2586       if (size_of_constraint > 1) {
2587         n++;
2588       }
2589       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2590       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2591     }
2592     if (constraints_idx) {
2593       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2594       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2595       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2596       n = 0;
2597       for (i=0;i<local_primal_size;i++) {
2598         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2599         if (size_of_constraint > 1) {
2600           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2601           /* find first untouched local node */
2602           j = 0;
2603           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2604           min_index = row_cmat_global_indices[j];
2605           min_loc = j;
2606           /* search the minimum among nodes not yet touched on the connected component
2607              since there can be more than one constraint on a single cc */
2608           for (j=1;j<size_of_constraint;j++) {
2609             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2610               min_index = row_cmat_global_indices[j];
2611               min_loc = j;
2612             }
2613           }
2614           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2615           constraints_index[n++] = row_cmat_indices[min_loc];
2616         }
2617         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2618       }
2619       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2620       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2621     }
2622   }
2623   *n_constraints = n;
2624   if (constraints_idx) *constraints_idx = constraints_index;
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "PCBDDCSubsetNumbering"
2630 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2631 {
2632   Vec            local_vec,global_vec;
2633   IS             seqis,paris;
2634   VecScatter     scatter_ctx;
2635   PetscScalar    *array;
2636   PetscInt       *temp_global_dofs;
2637   PetscScalar    globalsum;
2638   PetscInt       i,j,s;
2639   PetscInt       nlocals,first_index,old_index,max_local;
2640   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2641   PetscMPIInt    *dof_sizes,*dof_displs;
2642   PetscBool      first_found;
2643   PetscErrorCode ierr;
2644 
2645   PetscFunctionBegin;
2646   /* mpi buffers */
2647   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2648   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2649   j = ( !rank_prec_comm ? size_prec_comm : 0);
2650   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2651   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2652   /* get maximum size of subset */
2653   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2654   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2655   max_local = 0;
2656   for (i=0;i<n_local_dofs;i++) {
2657     if (max_local < temp_global_dofs[i] ) {
2658       max_local = temp_global_dofs[i];
2659     }
2660   }
2661   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2662   max_global++;
2663   max_local = 0;
2664   for (i=0;i<n_local_dofs;i++) {
2665     if (max_local < local_dofs[i] ) {
2666       max_local = local_dofs[i];
2667     }
2668   }
2669   max_local++;
2670   /* allocate workspace */
2671   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2672   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2673   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2674   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2675   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2676   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2677   /* create scatter */
2678   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2679   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2680   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2681   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2682   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2683   /* init array */
2684   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2685   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2686   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2687   if (local_dofs_mult) {
2688     for (i=0;i<n_local_dofs;i++) {
2689       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2690     }
2691   } else {
2692     for (i=0;i<n_local_dofs;i++) {
2693       array[local_dofs[i]]=1.0;
2694     }
2695   }
2696   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2697   /* scatter into global vec and get total number of global dofs */
2698   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2699   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2700   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2701   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2702   /* Fill global_vec with cumulative function for global numbering */
2703   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2704   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2705   nlocals = 0;
2706   first_index = -1;
2707   first_found = PETSC_FALSE;
2708   for (i=0;i<s;i++) {
2709     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2710       first_found = PETSC_TRUE;
2711       first_index = i;
2712     }
2713     nlocals += (PetscInt)PetscRealPart(array[i]);
2714   }
2715   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2716   if (!rank_prec_comm) {
2717     dof_displs[0]=0;
2718     for (i=1;i<size_prec_comm;i++) {
2719       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2720     }
2721   }
2722   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2723   if (first_found) {
2724     array[first_index] += (PetscScalar)nlocals;
2725     old_index = first_index;
2726     for (i=first_index+1;i<s;i++) {
2727       if (PetscRealPart(array[i]) > 0.1) {
2728         array[i] += array[old_index];
2729         old_index = i;
2730       }
2731     }
2732   }
2733   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2734   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2735   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2736   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2737   /* get global ordering of local dofs */
2738   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2739   if (local_dofs_mult) {
2740     for (i=0;i<n_local_dofs;i++) {
2741       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2742     }
2743   } else {
2744     for (i=0;i<n_local_dofs;i++) {
2745       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2746     }
2747   }
2748   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2749   /* free workspace */
2750   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2751   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2752   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2753   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2754   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2755   /* return pointer to global ordering of local dofs */
2756   *global_numbering_subset = temp_global_dofs;
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2762 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2763 {
2764   PetscInt       i,j;
2765   PetscScalar    *alphas;
2766   PetscErrorCode ierr;
2767 
2768   PetscFunctionBegin;
2769   /* this implements stabilized Gram-Schmidt */
2770   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2771   for (i=0;i<n;i++) {
2772     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2773     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2774     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2775   }
2776   ierr = PetscFree(alphas);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatISGetSubassemblingPattern"
2782 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2783 {
2784   Mat             subdomain_adj;
2785   IS              new_ranks,ranks_send_to;
2786   MatPartitioning partitioner;
2787   Mat_IS          *matis;
2788   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2789   PetscInt        prank;
2790   PetscMPIInt     size,rank,color;
2791   PetscInt        *xadj,*adjncy,*oldranks;
2792   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2793   PetscInt        i,local_size,threshold=0;
2794   PetscErrorCode  ierr;
2795   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2796   PetscSubcomm    subcomm;
2797 
2798   PetscFunctionBegin;
2799   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2800   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2801   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2802 
2803   /* Get info on mapping */
2804   matis = (Mat_IS*)(mat->data);
2805   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2806   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2807 
2808   /* build local CSR graph of subdomains' connectivity */
2809   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2810   xadj[0] = 0;
2811   xadj[1] = PetscMax(n_neighs-1,0);
2812   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2813   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2814 
2815   if (threshold) {
2816     PetscInt xadj_count = 0;
2817     for (i=1;i<n_neighs;i++) {
2818       if (n_shared[i] > threshold) {
2819         adjncy[xadj_count] = neighs[i];
2820         adjncy_wgt[xadj_count] = n_shared[i];
2821         xadj_count++;
2822       }
2823     }
2824     xadj[1] = xadj_count;
2825   } else {
2826     if (xadj[1]) {
2827       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2828       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2829     }
2830   }
2831   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2832   if (use_square) {
2833     for (i=0;i<xadj[1];i++) {
2834       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2835     }
2836   }
2837   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2838 
2839   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2840 
2841   /*
2842     Restrict work on active processes only.
2843   */
2844   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2845   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2846   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2847   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2848   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2849   if (color) {
2850     ierr = PetscFree(xadj);CHKERRQ(ierr);
2851     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2852     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2853   } else {
2854     PetscInt coarsening_ratio;
2855     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2856     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2857     prank = rank;
2858     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2859     /*
2860     for (i=0;i<size;i++) {
2861       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2862     }
2863     */
2864     for (i=0;i<xadj[1];i++) {
2865       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2866     }
2867     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2868     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2869     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2870 
2871     /* Partition */
2872     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2873     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2874     if (use_vwgt) {
2875       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
2876       v_wgt[0] = local_size;
2877       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2878     }
2879     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2880     coarsening_ratio = size/n_subdomains;
2881     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2882     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2883     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2884     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2885 
2886     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2887     if (contiguous) {
2888       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2889     } else {
2890       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2891     }
2892     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2893     /* clean up */
2894     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2895     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2896     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2897     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2898   }
2899   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2900 
2901   /* assemble parallel IS for sends */
2902   i = 1;
2903   if (color) i=0;
2904   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2905 
2906   /* get back IS */
2907   *is_sends = ranks_send_to;
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2912 
2913 #undef __FUNCT__
2914 #define __FUNCT__ "MatISSubassemble"
2915 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2916 {
2917   Mat                    local_mat;
2918   Mat_IS                 *matis;
2919   IS                     is_sends_internal;
2920   PetscInt               rows,cols,new_local_rows;
2921   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2922   PetscBool              ismatis,isdense,newisdense,destroy_mat;
2923   ISLocalToGlobalMapping l2gmap;
2924   PetscInt*              l2gmap_indices;
2925   const PetscInt*        is_indices;
2926   MatType                new_local_type;
2927   /* buffers */
2928   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2929   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2930   PetscInt               *recv_buffer_idxs_local;
2931   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2932   /* MPI */
2933   MPI_Comm               comm,comm_n;
2934   PetscSubcomm           subcomm;
2935   PetscMPIInt            n_sends,n_recvs,commsize;
2936   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2937   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2938   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2939   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2940   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2941   PetscErrorCode         ierr;
2942 
2943   PetscFunctionBegin;
2944   /* TODO: add missing checks */
2945   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2946   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2947   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2948   PetscValidLogicalCollectiveInt(mat,nis,7);
2949   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2950   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2951   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2952   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2953   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2954   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2955   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2956   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2957     PetscInt mrows,mcols,mnrows,mncols;
2958     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2959     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2960     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2961     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2962     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2963     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2964   }
2965   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2966   PetscValidLogicalCollectiveInt(mat,bs,0);
2967   /* prepare IS for sending if not provided */
2968   if (!is_sends) {
2969     PetscBool pcontig = PETSC_TRUE;
2970     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2971     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
2972   } else {
2973     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2974     is_sends_internal = is_sends;
2975   }
2976 
2977   /* get pointer of MATIS data */
2978   matis = (Mat_IS*)mat->data;
2979 
2980   /* get comm */
2981   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2982 
2983   /* compute number of sends */
2984   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2985   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2986 
2987   /* compute number of receives */
2988   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2989   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
2990   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2991   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2992   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2993   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2994   ierr = PetscFree(iflags);CHKERRQ(ierr);
2995 
2996   /* restrict comm if requested */
2997   subcomm = 0;
2998   destroy_mat = PETSC_FALSE;
2999   if (restrict_comm) {
3000     PetscMPIInt color,rank,subcommsize;
3001     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3002     color = 0;
3003     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3004     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3005     subcommsize = commsize - subcommsize;
3006     /* check if reuse has been requested */
3007     if (reuse == MAT_REUSE_MATRIX) {
3008       if (*mat_n) {
3009         PetscMPIInt subcommsize2;
3010         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3011         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3012         comm_n = PetscObjectComm((PetscObject)*mat_n);
3013       } else {
3014         comm_n = PETSC_COMM_SELF;
3015       }
3016     } else { /* MAT_INITIAL_MATRIX */
3017       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3018       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3019       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3020       comm_n = subcomm->comm;
3021     }
3022     /* flag to destroy *mat_n if not significative */
3023     if (color) destroy_mat = PETSC_TRUE;
3024   } else {
3025     comm_n = comm;
3026   }
3027 
3028   /* prepare send/receive buffers */
3029   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3030   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3031   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3032   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3033   if (nis) {
3034     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3035     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3036   }
3037 
3038   /* Get data from local matrices */
3039   if (!isdense) {
3040     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3041     /* TODO: See below some guidelines on how to prepare the local buffers */
3042     /*
3043        send_buffer_vals should contain the raw values of the local matrix
3044        send_buffer_idxs should contain:
3045        - MatType_PRIVATE type
3046        - PetscInt        size_of_l2gmap
3047        - PetscInt        global_row_indices[size_of_l2gmap]
3048        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3049     */
3050   } else {
3051     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3052     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3053     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
3054     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3055     send_buffer_idxs[1] = i;
3056     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3057     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3058     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3059     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3060     for (i=0;i<n_sends;i++) {
3061       ilengths_vals[is_indices[i]] = len*len;
3062       ilengths_idxs[is_indices[i]] = len+2;
3063     }
3064   }
3065   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3066   /* additional is (if any) */
3067   if (nis) {
3068     PetscMPIInt psum;
3069     PetscInt j;
3070     for (j=0,psum=0;j<nis;j++) {
3071       PetscInt plen;
3072       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3073       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3074       psum += len+1; /* indices + lenght */
3075     }
3076     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3077     for (j=0,psum=0;j<nis;j++) {
3078       PetscInt plen;
3079       const PetscInt *is_array_idxs;
3080       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3081       send_buffer_idxs_is[psum] = plen;
3082       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3083       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3084       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3085       psum += plen+1; /* indices + lenght */
3086     }
3087     for (i=0;i<n_sends;i++) {
3088       ilengths_idxs_is[is_indices[i]] = psum;
3089     }
3090     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3091   }
3092 
3093   buf_size_idxs = 0;
3094   buf_size_vals = 0;
3095   buf_size_idxs_is = 0;
3096   for (i=0;i<n_recvs;i++) {
3097     buf_size_idxs += (PetscInt)olengths_idxs[i];
3098     buf_size_vals += (PetscInt)olengths_vals[i];
3099     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3100   }
3101   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3102   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3103   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3104 
3105   /* get new tags for clean communications */
3106   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3107   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3108   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3109 
3110   /* allocate for requests */
3111   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3112   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3113   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3114   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3115   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3116   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3117 
3118   /* communications */
3119   ptr_idxs = recv_buffer_idxs;
3120   ptr_vals = recv_buffer_vals;
3121   ptr_idxs_is = recv_buffer_idxs_is;
3122   for (i=0;i<n_recvs;i++) {
3123     source_dest = onodes[i];
3124     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3125     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3126     ptr_idxs += olengths_idxs[i];
3127     ptr_vals += olengths_vals[i];
3128     if (nis) {
3129       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3130       ptr_idxs_is += olengths_idxs_is[i];
3131     }
3132   }
3133   for (i=0;i<n_sends;i++) {
3134     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3135     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3136     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3137     if (nis) {
3138       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3139     }
3140   }
3141   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3142   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3143 
3144   /* assemble new l2g map */
3145   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3146   ptr_idxs = recv_buffer_idxs;
3147   new_local_rows = 0;
3148   for (i=0;i<n_recvs;i++) {
3149     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3150     ptr_idxs += olengths_idxs[i];
3151   }
3152   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3153   ptr_idxs = recv_buffer_idxs;
3154   new_local_rows = 0;
3155   for (i=0;i<n_recvs;i++) {
3156     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3157     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3158     ptr_idxs += olengths_idxs[i];
3159   }
3160   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3161   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3162   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3163 
3164   /* infer new local matrix type from received local matrices type */
3165   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3166   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3167   if (n_recvs) {
3168     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3169     ptr_idxs = recv_buffer_idxs;
3170     for (i=0;i<n_recvs;i++) {
3171       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3172         new_local_type_private = MATAIJ_PRIVATE;
3173         break;
3174       }
3175       ptr_idxs += olengths_idxs[i];
3176     }
3177     switch (new_local_type_private) {
3178       case MATDENSE_PRIVATE:
3179         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3180           new_local_type = MATSEQAIJ;
3181           bs = 1;
3182         } else { /* if I receive only 1 dense matrix */
3183           new_local_type = MATSEQDENSE;
3184           bs = 1;
3185         }
3186         break;
3187       case MATAIJ_PRIVATE:
3188         new_local_type = MATSEQAIJ;
3189         bs = 1;
3190         break;
3191       case MATBAIJ_PRIVATE:
3192         new_local_type = MATSEQBAIJ;
3193         break;
3194       case MATSBAIJ_PRIVATE:
3195         new_local_type = MATSEQSBAIJ;
3196         break;
3197       default:
3198         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3199         break;
3200     }
3201   } else { /* by default, new_local_type is seqdense */
3202     new_local_type = MATSEQDENSE;
3203     bs = 1;
3204   }
3205 
3206   /* create MATIS object if needed */
3207   if (reuse == MAT_INITIAL_MATRIX) {
3208     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3209     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3210   } else {
3211     /* it also destroys the local matrices */
3212     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3213   }
3214   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3215   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3216 
3217   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3218 
3219   /* Global to local map of received indices */
3220   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3221   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3222   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3223 
3224   /* restore attributes -> type of incoming data and its size */
3225   buf_size_idxs = 0;
3226   for (i=0;i<n_recvs;i++) {
3227     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3228     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3229     buf_size_idxs += (PetscInt)olengths_idxs[i];
3230   }
3231   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3232 
3233   /* set preallocation */
3234   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3235   if (!newisdense) {
3236     PetscInt *new_local_nnz=0;
3237 
3238     ptr_vals = recv_buffer_vals;
3239     ptr_idxs = recv_buffer_idxs_local;
3240     if (n_recvs) {
3241       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3242     }
3243     for (i=0;i<n_recvs;i++) {
3244       PetscInt j;
3245       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3246         for (j=0;j<*(ptr_idxs+1);j++) {
3247           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3248         }
3249       } else {
3250         /* TODO */
3251       }
3252       ptr_idxs += olengths_idxs[i];
3253     }
3254     if (new_local_nnz) {
3255       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3256       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3257       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3258       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3259       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3260       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3261     } else {
3262       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3263     }
3264     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3265   } else {
3266     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3267   }
3268 
3269   /* set values */
3270   ptr_vals = recv_buffer_vals;
3271   ptr_idxs = recv_buffer_idxs_local;
3272   for (i=0;i<n_recvs;i++) {
3273     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3274       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3275       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3276       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3277       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3278       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3279     } else {
3280       /* TODO */
3281     }
3282     ptr_idxs += olengths_idxs[i];
3283     ptr_vals += olengths_vals[i];
3284   }
3285   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3290   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3291 
3292 #if 0
3293   if (!restrict_comm) { /* check */
3294     Vec       lvec,rvec;
3295     PetscReal infty_error;
3296 
3297     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3298     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3299     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3300     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3301     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3302     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3303     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3304     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3305     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3306   }
3307 #endif
3308 
3309   /* assemble new additional is (if any) */
3310   if (nis) {
3311     PetscInt **temp_idxs,*count_is,j,psum;
3312 
3313     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3314     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3315     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3316     ptr_idxs = recv_buffer_idxs_is;
3317     psum = 0;
3318     for (i=0;i<n_recvs;i++) {
3319       for (j=0;j<nis;j++) {
3320         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3321         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3322         psum += plen;
3323         ptr_idxs += plen+1; /* shift pointer to received data */
3324       }
3325     }
3326     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3327     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3328     for (i=1;i<nis;i++) {
3329       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3330     }
3331     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3332     ptr_idxs = recv_buffer_idxs_is;
3333     for (i=0;i<n_recvs;i++) {
3334       for (j=0;j<nis;j++) {
3335         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3336         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3337         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3338         ptr_idxs += plen+1; /* shift pointer to received data */
3339       }
3340     }
3341     for (i=0;i<nis;i++) {
3342       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3343       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3344       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3345     }
3346     ierr = PetscFree(count_is);CHKERRQ(ierr);
3347     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3348     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3349   }
3350   /* free workspace */
3351   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3352   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3353   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3354   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3355   if (isdense) {
3356     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3357     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3358   } else {
3359     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3360   }
3361   if (nis) {
3362     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3363     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3364   }
3365   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3366   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3367   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3368   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3369   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3370   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3371   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3372   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3373   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3374   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3375   ierr = PetscFree(onodes);CHKERRQ(ierr);
3376   if (nis) {
3377     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3378     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3379     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3380   }
3381   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3382   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3383     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3384     for (i=0;i<nis;i++) {
3385       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3386     }
3387   }
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 /* temporary hack into ksp private data structure */
3392 #include <petsc-private/kspimpl.h>
3393 
3394 #undef __FUNCT__
3395 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3396 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3397 {
3398   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3399   PC_IS                  *pcis = (PC_IS*)pc->data;
3400   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3401   MatNullSpace           CoarseNullSpace=NULL;
3402   ISLocalToGlobalMapping coarse_islg;
3403   IS                     coarse_is,*isarray;
3404   PetscInt               i,im_active=-1,active_procs=-1;
3405   PetscInt               nis,nisdofs,nisneu;
3406   PC                     pc_temp;
3407   PCType                 coarse_pc_type;
3408   KSPType                coarse_ksp_type;
3409   PetscBool              multilevel_requested,multilevel_allowed;
3410   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3411   Mat                    t_coarse_mat_is;
3412   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3413   PetscMPIInt            all_procs;
3414   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3415   PetscBool              compute_vecs = PETSC_FALSE;
3416   PetscScalar            *array;
3417   PetscErrorCode         ierr;
3418 
3419   PetscFunctionBegin;
3420   /* Assign global numbering to coarse dofs */
3421   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3422     compute_vecs = PETSC_TRUE;
3423     PetscInt ocoarse_size;
3424     ocoarse_size = pcbddc->coarse_size;
3425     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3426     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3427     /* see if we can avoid some work */
3428     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3429       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3430         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3431         coarse_reuse = PETSC_FALSE;
3432       } else { /* we can safely reuse already computed coarse matrix */
3433         coarse_reuse = PETSC_TRUE;
3434       }
3435     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3436       coarse_reuse = PETSC_FALSE;
3437     }
3438     /* reset any subassembling information */
3439     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3440     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3441   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3442     coarse_reuse = PETSC_TRUE;
3443   }
3444 
3445   /* count "active" (i.e. with positive local size) and "void" processes */
3446   im_active = !!(pcis->n);
3447   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3448   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3449   void_procs = all_procs-active_procs;
3450   csin_type_simple = PETSC_TRUE;
3451   redist = PETSC_FALSE;
3452   if (pcbddc->current_level && void_procs) {
3453     csin_ml = PETSC_TRUE;
3454     ncoarse_ml = void_procs;
3455     csin_ds = PETSC_TRUE;
3456     ncoarse_ds = void_procs;
3457   } else {
3458     csin_ml = PETSC_FALSE;
3459     ncoarse_ml = all_procs;
3460     if (void_procs) {
3461       csin_ds = PETSC_TRUE;
3462       ncoarse_ds = void_procs;
3463       csin_type_simple = PETSC_FALSE;
3464     } else {
3465       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3466         csin_ds = PETSC_TRUE;
3467         ncoarse_ds = pcbddc->redistribute_coarse;
3468         redist = PETSC_TRUE;
3469       } else {
3470         csin_ds = PETSC_FALSE;
3471         ncoarse_ds = all_procs;
3472       }
3473     }
3474   }
3475 
3476   /*
3477     test if we can go multilevel: three conditions must be satisfied:
3478     - we have not exceeded the number of levels requested
3479     - we can actually subassemble the active processes
3480     - we can find a suitable number of MPI processes where we can place the subassembled problem
3481   */
3482   multilevel_allowed = PETSC_FALSE;
3483   multilevel_requested = PETSC_FALSE;
3484   if (pcbddc->current_level < pcbddc->max_levels) {
3485     multilevel_requested = PETSC_TRUE;
3486     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3487       multilevel_allowed = PETSC_FALSE;
3488     } else {
3489       multilevel_allowed = PETSC_TRUE;
3490     }
3491   }
3492   /* determine number of process partecipating to coarse solver */
3493   if (multilevel_allowed) {
3494     ncoarse = ncoarse_ml;
3495     csin = csin_ml;
3496   } else {
3497     ncoarse = ncoarse_ds;
3498     csin = csin_ds;
3499   }
3500 
3501   /* creates temporary l2gmap and IS for coarse indexes */
3502   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3503   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3504 
3505   /* creates temporary MATIS object for coarse matrix */
3506   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3507   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3508   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3509   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3510 #if 0
3511   {
3512     PetscViewer viewer;
3513     char filename[256];
3514     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3515     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3516     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3517     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3518     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3519   }
3520 #endif
3521   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3522   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3523   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3524   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3525   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3526 
3527   /* compute dofs splitting and neumann boundaries for coarse dofs */
3528   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3529     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3530     const PetscInt         *idxs;
3531     ISLocalToGlobalMapping tmap;
3532 
3533     /* create map between primal indices (in local representative ordering) and local primal numbering */
3534     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3535     /* allocate space for temporary storage */
3536     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3537     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3538     /* allocate for IS array */
3539     nisdofs = pcbddc->n_ISForDofsLocal;
3540     nisneu = !!pcbddc->NeumannBoundariesLocal;
3541     nis = nisdofs + nisneu;
3542     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3543     /* dofs splitting */
3544     for (i=0;i<nisdofs;i++) {
3545       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3546       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3547       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3548       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3549       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3550       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3551       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3552       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3553     }
3554     /* neumann boundaries */
3555     if (pcbddc->NeumannBoundariesLocal) {
3556       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3557       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3558       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3559       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3560       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3561       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3562       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3563       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3564     }
3565     /* free memory */
3566     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3567     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3568     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3569   } else {
3570     nis = 0;
3571     nisdofs = 0;
3572     nisneu = 0;
3573     isarray = NULL;
3574   }
3575   /* destroy no longer needed map */
3576   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3577 
3578   /* restrict on coarse candidates (if needed) */
3579   coarse_mat_is = NULL;
3580   if (csin) {
3581     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3582       if (redist) {
3583         PetscMPIInt rank;
3584         PetscInt spc,n_spc_p1,dest[1];
3585 
3586         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3587         spc = all_procs/pcbddc->redistribute_coarse;
3588         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3589         if (rank > n_spc_p1*(spc+1)-1) {
3590           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3591         } else {
3592           dest[0] = rank/(spc+1);
3593         }
3594         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3595         ierr = ISView(pcbddc->coarse_subassembling_init,NULL);CHKERRQ(ierr);
3596       } else {
3597         PetscInt j,tissize,*nisindices;
3598         PetscInt *coarse_candidates;
3599         const PetscInt* tisindices;
3600         /* get coarse candidates' ranks in pc communicator */
3601         ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3602         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3603         for (i=0,j=0;i<all_procs;i++) {
3604           if (!coarse_candidates[i]) {
3605             coarse_candidates[j]=i;
3606             j++;
3607           }
3608         }
3609         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3610         /* get a suitable subassembling pattern */
3611         if (csin_type_simple) {
3612           PetscMPIInt rank;
3613           PetscInt    issize,isidx;
3614           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3615           if (im_active) {
3616             issize = 1;
3617             isidx = (PetscInt)rank;
3618           } else {
3619             issize = 0;
3620             isidx = -1;
3621           }
3622           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3623         } else {
3624           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3625         }
3626         if (pcbddc->dbg_flag) {
3627           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3628           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3629           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3630           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3631           for (i=0;i<j;i++) {
3632             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3633           }
3634           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3635           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3636         }
3637         /* shift the pattern on coarse candidates */
3638         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3639         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3640         ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3641         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3642         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3643         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3644         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3645       }
3646     }
3647     if (pcbddc->dbg_flag) {
3648       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3649       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3650       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3651       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3652     }
3653     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3654     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3655   } else {
3656     if (pcbddc->dbg_flag) {
3657       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3658       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3659       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3660     }
3661     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3662     coarse_mat_is = t_coarse_mat_is;
3663   }
3664 
3665   /* create local to global scatters for coarse problem */
3666   if (compute_vecs) {
3667     PetscInt lrows;
3668     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3669     if (coarse_mat_is) {
3670       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3671     } else {
3672       lrows = 0;
3673     }
3674     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3675     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3676     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3677     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3678     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3679   }
3680   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3681   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3682 
3683   /* set defaults for coarse KSP and PC */
3684   if (multilevel_allowed) {
3685     coarse_ksp_type = KSPRICHARDSON;
3686     coarse_pc_type = PCBDDC;
3687   } else {
3688     coarse_ksp_type = KSPPREONLY;
3689     coarse_pc_type = PCREDUNDANT;
3690   }
3691 
3692   /* print some info if requested */
3693   if (pcbddc->dbg_flag) {
3694     if (!multilevel_allowed) {
3695       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3696       if (multilevel_requested) {
3697         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3698       } else if (pcbddc->max_levels) {
3699         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3700       }
3701       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3702     }
3703   }
3704 
3705   /* create the coarse KSP object only once with defaults */
3706   if (coarse_mat_is) {
3707     MatReuse coarse_mat_reuse;
3708     PetscViewer dbg_viewer = NULL;
3709     if (pcbddc->dbg_flag) {
3710       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3711       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3712     }
3713     if (!pcbddc->coarse_ksp) {
3714       char prefix[256],str_level[16];
3715       size_t len;
3716       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3717       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3718       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3719       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3720       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3721       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3722       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3723       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3724       /* prefix */
3725       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3726       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3727       if (!pcbddc->current_level) {
3728         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3729         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3730       } else {
3731         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3732         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3733         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3734         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3735         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3736         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3737       }
3738       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3739       /* allow user customization */
3740       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3741       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3742     }
3743 
3744     /* get some info after set from options */
3745     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3746     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3747     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3748     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3749     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3750       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3751       isbddc = PETSC_FALSE;
3752     }
3753     if (isredundant) {
3754       KSP inner_ksp;
3755       PC inner_pc;
3756       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3757       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3758       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3759     }
3760 
3761     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3762     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3763     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3764     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3765     if (nisdofs) {
3766       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3767       for (i=0;i<nisdofs;i++) {
3768         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3769       }
3770     }
3771     if (nisneu) {
3772       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3773       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3774     }
3775 
3776     /* assemble coarse matrix */
3777     if (coarse_reuse) {
3778       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3779       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3780       coarse_mat_reuse = MAT_REUSE_MATRIX;
3781     } else {
3782       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3783     }
3784     if (isbddc || isnn) {
3785       if (pcbddc->coarsening_ratio > 1) {
3786         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3787           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3788           if (pcbddc->dbg_flag) {
3789             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3790             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3791             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3792             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3793           }
3794         }
3795         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3796       } else {
3797         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3798         coarse_mat = coarse_mat_is;
3799       }
3800     } else {
3801       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3802     }
3803     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3804 
3805     /* propagate symmetry info to coarse matrix */
3806     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3807     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3808 
3809     /* set operators */
3810     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3811     if (pcbddc->dbg_flag) {
3812       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3813     }
3814   } else { /* processes non partecipating to coarse solver (if any) */
3815     coarse_mat = 0;
3816   }
3817   ierr = PetscFree(isarray);CHKERRQ(ierr);
3818 #if 0
3819   {
3820     PetscViewer viewer;
3821     char filename[256];
3822     sprintf(filename,"coarse_mat.m");
3823     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3824     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3825     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3826     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3827   }
3828 #endif
3829 
3830   /* Compute coarse null space (special handling by BDDC only) */
3831   if (pcbddc->NullSpace) {
3832     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3833   }
3834 
3835   if (pcbddc->coarse_ksp) {
3836     Vec crhs,csol;
3837     PetscBool ispreonly;
3838     if (CoarseNullSpace) {
3839       if (isbddc) {
3840         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3841       } else {
3842         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3843       }
3844     }
3845     /* setup coarse ksp */
3846     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3847     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3848     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3849     /* hack */
3850     if (!csol) {
3851       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3852     }
3853     if (!crhs) {
3854       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3855     }
3856     /* Check coarse problem if in debug mode or if solving with an iterative method */
3857     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3858     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3859       KSP       check_ksp;
3860       KSPType   check_ksp_type;
3861       PC        check_pc;
3862       Vec       check_vec,coarse_vec;
3863       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3864       PetscInt  its;
3865       PetscBool compute_eigs;
3866       PetscReal *eigs_r,*eigs_c;
3867       PetscInt  neigs;
3868       const char *prefix;
3869 
3870       /* Create ksp object suitable for estimation of extreme eigenvalues */
3871       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3872       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3873       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3874       if (ispreonly) {
3875         check_ksp_type = KSPPREONLY;
3876         compute_eigs = PETSC_FALSE;
3877       } else {
3878         check_ksp_type = KSPGMRES;
3879         compute_eigs = PETSC_TRUE;
3880       }
3881       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3882       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3883       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3884       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3885       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3886       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3887       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3888       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3889       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3890       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3891       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3892       /* create random vec */
3893       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3894       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3895       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3896       if (CoarseNullSpace) {
3897         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3898       }
3899       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3900       /* solve coarse problem */
3901       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3902       if (CoarseNullSpace) {
3903         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3904       }
3905       /* set eigenvalue estimation if preonly has not been requested */
3906       if (compute_eigs) {
3907         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3908         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3909         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3910         lambda_max = eigs_r[neigs-1];
3911         lambda_min = eigs_r[0];
3912         if (pcbddc->use_coarse_estimates) {
3913           if (lambda_max>lambda_min) {
3914             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3915             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3916           }
3917         }
3918       }
3919 
3920       /* check coarse problem residual error */
3921       if (pcbddc->dbg_flag) {
3922         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3923         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3924         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3925         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3926         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3927         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3928         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3929         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3930         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3931         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3932         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3933         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3934         if (compute_eigs) {
3935           PetscReal lambda_max_s,lambda_min_s;
3936           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
3937           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3938           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3939           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3940           for (i=0;i<neigs;i++) {
3941             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3942           }
3943         }
3944         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3945         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3946       }
3947       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3948       if (compute_eigs) {
3949         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3950         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3951       }
3952     }
3953   }
3954   /* print additional info */
3955   if (pcbddc->dbg_flag) {
3956     /* waits until all processes reaches this point */
3957     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3958     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3960   }
3961 
3962   /* free memory */
3963   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3964   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 #undef __FUNCT__
3969 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3970 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3971 {
3972   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3973   PC_IS*         pcis = (PC_IS*)pc->data;
3974   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3975   PetscInt       i,coarse_size;
3976   PetscInt       *local_primal_indices;
3977   PetscErrorCode ierr;
3978 
3979   PetscFunctionBegin;
3980   /* Compute global number of coarse dofs */
3981   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3982     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3983   }
3984   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3985 
3986   /* check numbering */
3987   if (pcbddc->dbg_flag) {
3988     PetscScalar coarsesum,*array;
3989     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3990 
3991     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3993     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3994     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3995     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3996     for (i=0;i<pcbddc->local_primal_size;i++) {
3997       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3998     }
3999     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4000     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4001     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4002     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4003     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4004     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4005     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4006     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4007     for (i=0;i<pcis->n;i++) {
4008       if (array[i] == 1.0) {
4009         set_error = PETSC_TRUE;
4010         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4011       }
4012     }
4013     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4014     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4015     for (i=0;i<pcis->n;i++) {
4016       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4017     }
4018     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4019     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4020     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4021     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4022     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4023     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4024     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4025       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4026       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4027       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4028       for (i=0;i<pcbddc->local_primal_size;i++) {
4029         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4030       }
4031       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4032     }
4033     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4034     if (set_error_reduced) {
4035       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4036     }
4037   }
4038   /* get back data */
4039   *coarse_size_n = coarse_size;
4040   *local_primal_indices_n = local_primal_indices;
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 #undef __FUNCT__
4045 #define __FUNCT__ "PCBDDCGlobalToLocal"
4046 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4047 {
4048   IS             localis_t;
4049   PetscInt       i,lsize,*idxs,n;
4050   PetscScalar    *vals;
4051   PetscErrorCode ierr;
4052 
4053   PetscFunctionBegin;
4054   /* get indices in local ordering exploiting local to global map */
4055   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4056   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
4057   for (i=0;i<lsize;i++) vals[i] = 1.0;
4058   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4059   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4060   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4061   if (idxs) { /* multilevel guard */
4062     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4063   }
4064   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4065   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4066   ierr = PetscFree(vals);CHKERRQ(ierr);
4067   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4068   /* now compute set in local ordering */
4069   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4070   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4071   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4072   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4073   for (i=0,lsize=0;i<n;i++) {
4074     if (PetscRealPart(vals[i]) > 0.5) {
4075       lsize++;
4076     }
4077   }
4078   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
4079   for (i=0,lsize=0;i<n;i++) {
4080     if (PetscRealPart(vals[i]) > 0.5) {
4081       idxs[lsize++] = i;
4082     }
4083   }
4084   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4085   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4086   *localis = localis_t;
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4091 #undef __FUNCT__
4092 #define __FUNCT__ "PCBDDCMatMult_Private"
4093 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4094 {
4095   PCBDDCChange_ctx change_ctx;
4096   PetscErrorCode   ierr;
4097 
4098   PetscFunctionBegin;
4099   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4100   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4101   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4102   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 #undef __FUNCT__
4107 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4108 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4109 {
4110   PCBDDCChange_ctx change_ctx;
4111   PetscErrorCode   ierr;
4112 
4113   PetscFunctionBegin;
4114   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4115   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4116   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4117   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120