xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 684f69889ccadd76600d17944a5b5da27aff2493)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCSetUpSolvers"
10 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscScalar    *coarse_submat_vals;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local neumann solver ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCResetSolvers"
84 PetscErrorCode PCBDDCResetSolvers(PC pc)
85 {
86   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
87   PetscErrorCode ierr;
88 
89   PetscFunctionBegin;
90   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
91   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
119 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
120 {
121   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
122   PC_IS          *pcis = (PC_IS*)pc->data;
123   VecType        impVecType;
124   PetscInt       n_constraints,n_R,old_size;
125   PetscErrorCode ierr;
126 
127   PetscFunctionBegin;
128   if (!pcbddc->ConstraintMatrix) {
129     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
130   }
131   /* get sizes */
132   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
133   n_R = pcis->n-pcbddc->n_actual_vertices;
134   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
135   /* local work vectors (try to avoid unneeded work)*/
136   /* R nodes */
137   old_size = -1;
138   if (pcbddc->vec1_R) {
139     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
140   }
141   if (n_R != old_size) {
142     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
144     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
146     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
147     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
148   }
149   /* local primal dofs */
150   old_size = -1;
151   if (pcbddc->vec1_P) {
152     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
153   }
154   if (pcbddc->local_primal_size != old_size) {
155     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
157     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
158     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
159   }
160   /* local explicit constraints */
161   old_size = -1;
162   if (pcbddc->vec1_C) {
163     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
164   }
165   if (n_constraints && n_constraints != old_size) {
166     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
168     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
169     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "PCBDDCSetUpCorrection"
176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
177 {
178   PetscErrorCode         ierr;
179   /* pointers to pcis and pcbddc */
180   PC_IS*                 pcis = (PC_IS*)pc->data;
181   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
182   /* submatrices of local problem */
183   Mat                    A_RV,A_VR,A_VV;
184   /* working matrices */
185   Mat                    M1,M2,M3,C_CR;
186   /* working vectors */
187   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
188   /* additional working stuff */
189   IS                     is_aux;
190   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
191   const PetscScalar      *array,*row_cmat_values;
192   const PetscInt         *row_cmat_indices,*idx_R_local;
193   PetscInt               *idx_V_B,*auxindices;
194   PetscInt               n_vertices,n_constraints,size_of_constraint;
195   PetscInt               i,j,n_R,n_D,n_B;
196   PetscBool              unsymmetric_check;
197   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
198   MatType                impMatType;
199   /* some shortcuts to scalars */
200   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
201   /* for debugging purposes */
202   PetscReal              *coarsefunctions_errors,*constraints_errors;
203 
204   PetscFunctionBegin;
205   /* get number of vertices (corners plus constraints with change of basis)
206      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
207   n_vertices = pcbddc->n_actual_vertices;
208   n_constraints = pcbddc->local_primal_size-n_vertices;
209   /* Set Non-overlapping dimensions */
210   n_B = pcis->n_B; n_D = pcis->n - n_B;
211   n_R = pcis->n-n_vertices;
212 
213   /* Set types for local objects needed by BDDC precondtioner */
214   impMatType = MATSEQDENSE;
215 
216   /* Allocating some extra storage just to be safe */
217   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
218   for (i=0;i<pcis->n;i++) auxindices[i]=i;
219 
220   /* vertices in boundary numbering */
221   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
222   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
223   if (i != n_vertices) {
224     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
225   }
226 
227   /* Precompute stuffs needed for preprocessing and application of BDDC*/
228   if (n_constraints) {
229     /* see if we can save some allocations */
230     if (pcbddc->local_auxmat2) {
231       PetscInt on_R,on_constraints;
232       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
233       if (on_R != n_R || on_constraints != n_constraints) {
234         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
235         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
236       }
237     }
238     /* work vectors */
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
241     /* auxiliary matrices */
242     if (!pcbddc->local_auxmat2) {
243       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
244       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
245       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
246       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
247     }
248 
249     /* Extract constraints on R nodes: C_{CR}  */
250     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
251     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
252     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
253 
254     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
255     for (i=0;i<n_constraints;i++) {
256       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
257       /* Get row of constraint matrix in R numbering */
258       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
260       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
262       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
263       /* Solve for row of constraint matrix in R numbering */
264       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
265       /* Set values in local_auxmat2 */
266       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
268       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269     }
270     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
273 
274     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
275     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
276     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
277     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
278     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
279     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
280     ierr = MatSetUp(M1);CHKERRQ(ierr);
281     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
282     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
283     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
284     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
285     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
286     ierr = MatDestroy(&M2);CHKERRQ(ierr);
287     ierr = MatDestroy(&M3);CHKERRQ(ierr);
288     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
289     if (!pcbddc->local_auxmat1) {
290       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     } else {
292       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     }
294   }
295 
296   /* Get submatrices from subdomain matrix */
297   if (n_vertices) {
298     PetscInt ibs,mbs;
299     PetscBool issbaij;
300     Mat newmat;
301 
302     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
303     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
304     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
305     if (ibs != mbs) { /* need to convert to SEQAIJ */
306       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
310       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
311     } else {
312       /* this is safe */
313       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
314       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
315       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
316         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
317         /* which of the two approaches is faster? */
318         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
320         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
322         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
323       } else {
324         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
325         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
326       }
327     }
328     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
329     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
330     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
331   }
332 
333   /* Matrix of coarse basis functions (local) */
334   if (pcbddc->coarse_phi_B) {
335     PetscInt on_B,on_primal;
336     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
337     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
338       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
339       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
340     }
341   }
342   if (pcbddc->coarse_phi_D) {
343     PetscInt on_D,on_primal;
344     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
345     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
346       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
347       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
348     }
349   }
350   if (!pcbddc->coarse_phi_B) {
351     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
352     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
353     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
354     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
355   }
356   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
357     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
358     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
359     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
360     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
361   }
362 
363   if (pcbddc->dbg_flag) {
364     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
366     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
367   }
368   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
369   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
370 
371   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
372 
373   /* vertices */
374   for (i=0;i<n_vertices;i++) {
375     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
376     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
377     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
378     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
379     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
380     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
381     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
382     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
385     if (n_constraints) {
386       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
387       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
388       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     }
390     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
392 
393     /* Set values in coarse basis function and subdomain part of coarse_mat */
394     /* coarse basis functions */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
402     if (pcbddc->switch_static || pcbddc->dbg_flag) {
403       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
407       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408     }
409     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
410     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     if (n_constraints) {
414       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417     }
418 
419     /* check */
420     if (pcbddc->dbg_flag) {
421       /* assemble subdomain vector on local nodes */
422       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
423       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       if (n_R) {
425         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
426       }
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
429       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
430       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
431       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
432       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
433       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       if (n_constraints) {
437         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       }
441       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
443       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
444       /* check saddle point solution */
445       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
447       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
448       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
449       /* shift by the identity matrix */
450       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
454     }
455   }
456 
457   /* constraints */
458   for (i=0;i<n_constraints;i++) {
459     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
460     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
461     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
462     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
463     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
464     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
465     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
466     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
467     if (n_vertices) {
468       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
469     }
470     /* Set values in coarse basis function and subdomain part of coarse_mat */
471     /* coarse basis functions */
472     j = i+n_vertices; /* don't touch this! */
473     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
474     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
477     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
478     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
483       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
484       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
485     }
486     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
487     if (n_vertices) {
488       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
489       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
490       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
491     }
492     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
493     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
494     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
495 
496     if (pcbddc->dbg_flag) {
497       /* assemble subdomain vector on nodes */
498       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
499       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
500       if (n_R) {
501         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
502       }
503       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
504       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
505       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
506       /* assemble subdomain vector of lagrange multipliers */
507       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
508       if (n_vertices) {
509         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
510         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
511         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
512       }
513       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
514       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
515       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
516       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
517       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
518       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
519       /* check saddle point solution */
520       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
521       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
522       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
523       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
524       /* shift by the identity matrix */
525       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
526       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
527       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
528       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
529     }
530   }
531   /* call assembling routines for local coarse basis */
532   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534   if (pcbddc->switch_static || pcbddc->dbg_flag) {
535     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537   }
538 
539   /* compute other basis functions for non-symmetric problems */
540   /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/
541   { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */
542     PetscBool setsym;
543     ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr);
544     if (!setsym) pcbddc->issym = PETSC_FALSE;
545   }
546 
547   if (!pcbddc->issym) {
548     if (!pcbddc->coarse_psi_B) {
549       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
550       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
551       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
552       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
553     }
554     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
555       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
556       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
557       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
558       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
559     }
560     for (i=0;i<pcbddc->local_primal_size;i++) {
561       if (n_constraints) {
562         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
563         for (j=0;j<n_constraints;j++) {
564           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
565         }
566         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
567         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
568       }
569       if (i<n_vertices) {
570         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
571         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
572         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
573         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
574         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
575         if (n_constraints) {
576           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
577         }
578       } else {
579         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
580       }
581       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
582       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
583       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
585       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
586       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
587       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
588       if (i<n_vertices) {
589         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
590       }
591       if (pcbddc->switch_static || pcbddc->dbg_flag) {
592         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
593         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
594         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
595         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
596         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
597       }
598 
599       if (pcbddc->dbg_flag) {
600         /* assemble subdomain vector on nodes */
601         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
602         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
603         if (n_R) {
604           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
605         }
606         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
607         if (i<n_vertices) {
608           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
609         }
610         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
612         /* assemble subdomain vector of lagrange multipliers */
613         for (j=0;j<pcbddc->local_primal_size;j++) {
614           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
615         }
616         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
617         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
618         /* check saddle point solution */
619         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
620         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
621         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
622         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
623         /* shift by the identity matrix */
624         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
625         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
626         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
627         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
628       }
629     }
630     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
631     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
632     if (pcbddc->switch_static || pcbddc->dbg_flag) {
633       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
634       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
635     }
636     unsymmetric_check = PETSC_TRUE;
637   } else { /* take references to already computed coarse basis */
638     unsymmetric_check = PETSC_FALSE;
639     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
640     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
641     if (pcbddc->coarse_phi_D) {
642       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
643       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
644     }
645   }
646   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
647   /* Checking coarse_sub_mat and coarse basis functios */
648   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
649   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
650   if (pcbddc->dbg_flag) {
651     Mat         coarse_sub_mat;
652     Mat         AUXMAT,TM1,TM2,TM3,TM4;
653     Mat         coarse_phi_D,coarse_phi_B;
654     Mat         coarse_psi_D,coarse_psi_B;
655     Mat         A_II,A_BB,A_IB,A_BI;
656     MatType     checkmattype=MATSEQAIJ;
657     PetscReal   real_value;
658 
659     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
660     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
661     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
662     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
663     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
664     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
665     if (unsymmetric_check) {
666       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
667       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
668     }
669     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
670 
671     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
672     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
673     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
674     if (unsymmetric_check) {
675       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
676       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
677       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
678       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
679       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
680       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
681       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
682       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
683       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
684       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
685       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
686       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
687     } else {
688       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
689       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
690       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
691       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
692       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
693       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
694       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
695       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
696     }
697     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
698     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
699     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
700     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
701     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
702     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
703     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
705     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
706     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
707     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
708     for (i=0;i<pcbddc->local_primal_size;i++) {
709       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
710     }
711     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
712     for (i=0;i<pcbddc->local_primal_size;i++) {
713       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
714     }
715     if (unsymmetric_check) {
716       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
717       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
718         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
719       }
720       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
721       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
722         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
723       }
724     }
725     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
730     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
731     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
732     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
733     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
734     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
735     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
736     if (unsymmetric_check) {
737       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
738       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
739     }
740     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
741     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
742     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
743     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
744   }
745   /* free memory */
746   if (n_vertices) {
747     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
748     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
749     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
750     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
751     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
752   }
753   if (n_constraints) {
754     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
755     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
756     ierr = MatDestroy(&M1);CHKERRQ(ierr);
757     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
758   }
759   ierr = PetscFree(auxindices);CHKERRQ(ierr);
760   /* get back data */
761   *coarse_submat_vals_n = coarse_submat_vals;
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
767 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
768 {
769   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
770   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
771   Mat            *change_mat_all;
772   IS             is_local,is_global;
773   PetscBool      sorted,isseqaij;
774   PetscInt       local_size,*idxs_perm;
775   PetscErrorCode ierr;
776 
777   PetscFunctionBegin;
778   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
779   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
780   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
781   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
782   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
783   ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr);
784   if (!sorted) {
785     const PetscInt *idxs;
786     PetscInt *idxs_sorted,i;
787 
788     ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr);
789     ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr);
790     for (i=0;i<local_size;i++) {
791       idxs_perm[i] = i;
792     }
793     ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr);
794     ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr);
795     for (i=0;i<local_size;i++) {
796       idxs_sorted[i] = idxs[idxs_perm[i]];
797     }
798     ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr);
799     ierr = ISDestroy(&is_global);CHKERRQ(ierr);
800     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr);
801   }
802 
803   /* get change of basis on the whole set of local dofs */
804   ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr);
805 
806   if (!sorted) {
807     Mat      new_mat;
808     IS       is_perm;
809     PetscInt *idxs,i;
810 
811     ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr);
812     for (i=0;i<local_size;i++) {
813       idxs[idxs_perm[i]] = i;
814     }
815     ierr = PetscFree(idxs_perm);CHKERRQ(ierr);
816     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr);
817     ierr = ISSetPermutation(is_perm);CHKERRQ(ierr);
818     ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr);
819     ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr);
820     change_mat_all[0] = new_mat;
821     ierr = ISDestroy(&is_perm);CHKERRQ(ierr);
822   }
823 
824   /* check */
825   if (pcbddc->dbg_flag) {
826     Vec       x,x_change;
827     PetscReal error;
828 
829     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
830     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
831     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
832     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834     ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr);
835     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
836     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
837     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
838     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
839     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
840     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
841     ierr = VecDestroy(&x);CHKERRQ(ierr);
842     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
843   }
844 
845   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
846   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
847   if (isseqaij) {
848     ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
849   } else {
850     Mat work_mat;
851     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
852     ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
853     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
854   }
855   /*
856   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
857   ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
858   */
859   ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr);
860   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
867 {
868   PC_IS*         pcis = (PC_IS*)(pc->data);
869   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
870   IS             is_aux1,is_aux2;
871   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
872   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
873   PetscInt       vbs,bs;
874   PetscBT        bitmask;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   /*
879     No need to setup local scatters if
880       - primal space is unchanged
881         AND
882       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
883         AND
884       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
885   */
886   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
887     PetscFunctionReturn(0);
888   }
889   /* destroy old objects */
890   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
892   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
893   /* Set Non-overlapping dimensions */
894   n_B = pcis->n_B; n_D = pcis->n - n_B;
895   n_vertices = pcbddc->n_actual_vertices;
896   /* create auxiliary bitmask */
897   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
898   for (i=0;i<n_vertices;i++) {
899     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
900   }
901 
902   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
903   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
904   for (i=0, n_R=0; i<pcis->n; i++) {
905     if (!PetscBTLookup(bitmask,i)) {
906       idx_R_local[n_R] = i;
907       n_R++;
908     }
909   }
910 
911   /* Block code */
912   vbs = 1;
913   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
914   if (bs>1 && !(n_vertices%bs)) {
915     PetscBool is_blocked = PETSC_TRUE;
916     PetscInt  *vary;
917     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
918     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
919     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
920     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
921     for (i=0; i<n_vertices; i++) {
922       if (vary[i]!=0 && vary[i]!=bs) {
923         is_blocked = PETSC_FALSE;
924         break;
925       }
926     }
927     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
928       vbs = bs;
929       for (i=0;i<n_R/vbs;i++) {
930         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
931       }
932     }
933     ierr = PetscFree(vary);CHKERRQ(ierr);
934   }
935   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
936   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
937 
938   /* print some info if requested */
939   if (pcbddc->dbg_flag) {
940     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
941     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
942     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
943     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
947     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
948   }
949 
950   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
951   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
952   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
953   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
954   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
955   for (i=0; i<n_D; i++) {
956     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
957   }
958   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
959   for (i=0, j=0; i<n_R; i++) {
960     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
961       aux_array1[j++] = i;
962     }
963   }
964   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
965   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
966   for (i=0, j=0; i<n_B; i++) {
967     if (!PetscBTLookup(bitmask,is_indices[i])) {
968       aux_array2[j++] = i;
969     }
970   }
971   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
972   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
973   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
974   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
975   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
976 
977   if (pcbddc->switch_static || pcbddc->dbg_flag) {
978     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
979     for (i=0, j=0; i<n_R; i++) {
980       if (PetscBTLookup(bitmask,idx_R_local[i])) {
981         aux_array1[j++] = i;
982       }
983     }
984     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
985     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
986     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
987   }
988   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
989   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
996 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
997 {
998   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
999   PC_IS          *pcis = (PC_IS*)pc->data;
1000   PC             pc_temp;
1001   Mat            A_RR;
1002   MatReuse       reuse;
1003   PetscScalar    m_one = -1.0;
1004   PetscReal      value;
1005   PetscInt       n_D,n_R,ibs,mbs;
1006   PetscBool      use_exact,use_exact_reduced,issbaij;
1007   PetscErrorCode ierr;
1008   /* prefixes stuff */
1009   char           dir_prefix[256],neu_prefix[256],str_level[16];
1010   size_t         len;
1011 
1012   PetscFunctionBegin;
1013 
1014   /* compute prefixes */
1015   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1016   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1017   if (!pcbddc->current_level) {
1018     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1019     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1020     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1021     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1022   } else {
1023     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1024     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1025     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1026     len -= 15; /* remove "pc_bddc_coarse_" */
1027     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1028     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1029     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1030     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1031     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1032     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1033     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1034     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1035   }
1036 
1037   /* DIRICHLET PROBLEM */
1038   if (dirichlet) {
1039     /* Matrix for Dirichlet problem is pcis->A_II */
1040     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1041     if (!pcbddc->ksp_D) { /* create object if not yet build */
1042       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1043       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1044       /* default */
1045       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1046       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1047       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1048       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1049       if (issbaij) {
1050         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1051       } else {
1052         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1053       }
1054       /* Allow user's customization */
1055       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1056       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1057     }
1058     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1059     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1060     if (!n_D) {
1061       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1062       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1063     }
1064     /* Set Up KSP for Dirichlet problem of BDDC */
1065     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1066     /* set ksp_D into pcis data */
1067     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1068     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1069     pcis->ksp_D = pcbddc->ksp_D;
1070   }
1071 
1072   /* NEUMANN PROBLEM */
1073   A_RR = 0;
1074   if (neumann) {
1075     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1076     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1077     if (pcbddc->ksp_R) { /* already created ksp */
1078       PetscInt nn_R;
1079       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1080       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1081       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1082       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1083         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1084         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1085         reuse = MAT_INITIAL_MATRIX;
1086       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1087         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1088           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1089           reuse = MAT_INITIAL_MATRIX;
1090         } else { /* safe to reuse the matrix */
1091           reuse = MAT_REUSE_MATRIX;
1092         }
1093       }
1094       /* last check */
1095       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1096         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1097         reuse = MAT_INITIAL_MATRIX;
1098       }
1099     } else { /* first time, so we need to create the matrix */
1100       reuse = MAT_INITIAL_MATRIX;
1101     }
1102     /* extract A_RR */
1103     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1104     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1105     if (ibs != mbs) {
1106       Mat newmat;
1107       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1108       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1109       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1110     } else {
1111       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1112     }
1113     if (!pcbddc->ksp_R) { /* create object if not present */
1114       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1115       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1116       /* default */
1117       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1118       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1119       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1120       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1121       if (issbaij) {
1122         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1123       } else {
1124         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1125       }
1126       /* Allow user's customization */
1127       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1128       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1129     }
1130     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1131     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1132     if (!n_R) {
1133       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1134       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1135     }
1136     /* Set Up KSP for Neumann problem of BDDC */
1137     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1138   }
1139 
1140   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1141   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1142     if (pcbddc->dbg_flag) {
1143       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1144       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1145       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1146     }
1147     if (dirichlet) { /* Dirichlet */
1148       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1149       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1150       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1151       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1152       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1153       /* need to be adapted? */
1154       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1155       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1156       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1157       /* print info */
1158       if (pcbddc->dbg_flag) {
1159         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1160         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1161       }
1162       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1163         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1164       }
1165     }
1166     if (neumann) { /* Neumann */
1167       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1168       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1169       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1170       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1171       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1172       /* need to be adapted? */
1173       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1174       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1175       /* print info */
1176       if (pcbddc->dbg_flag) {
1177         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1178         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1179       }
1180       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1181         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1182       }
1183     }
1184   }
1185   /* free Neumann problem's matrix */
1186   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1192 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1193 {
1194   PetscErrorCode ierr;
1195   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1196 
1197   PetscFunctionBegin;
1198   if (applytranspose) {
1199     if (pcbddc->local_auxmat1) {
1200       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1201       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1202     }
1203     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1204   } else {
1205     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1206     if (pcbddc->local_auxmat1) {
1207       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1208       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1209     }
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1215 #undef __FUNCT__
1216 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1217 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1218 {
1219   PetscErrorCode ierr;
1220   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1221   PC_IS*            pcis = (PC_IS*)  (pc->data);
1222   const PetscScalar zero = 0.0;
1223 
1224   PetscFunctionBegin;
1225   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1226   if (applytranspose) {
1227     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1228     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1229   } else {
1230     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1231     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1232   }
1233   /* start communications from local primal nodes to rhs of coarse solver */
1234   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1235   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237 
1238   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1239   /* TODO remove null space when doing multilevel */
1240   if (pcbddc->coarse_ksp) {
1241     if (applytranspose) {
1242       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1243     } else {
1244       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1245     }
1246   }
1247 
1248   /* Local solution on R nodes */
1249   if (pcis->n) {
1250     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1251     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1252     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1253     if (pcbddc->switch_static) {
1254       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1255       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1256     }
1257     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1258     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1259     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261     if (pcbddc->switch_static) {
1262       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     }
1265   }
1266 
1267   /* communications from coarse sol to local primal nodes */
1268   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1269   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1270 
1271   /* Sum contributions from two levels */
1272   if (applytranspose) {
1273     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1274     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1275   } else {
1276     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1277     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1283 #undef __FUNCT__
1284 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1285 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1286 {
1287   PetscErrorCode ierr;
1288   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1289   PetscScalar    *array,*array2;
1290   Vec            from,to;
1291 
1292   PetscFunctionBegin;
1293   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1294     from = pcbddc->coarse_vec;
1295     to = pcbddc->vec1_P;
1296     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1297       Vec tvec;
1298       PetscInt lsize;
1299       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1300       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1301       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1302       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1303       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1304       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1305       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1306     }
1307   } else { /* from local to global -> put data in coarse right hand side */
1308     from = pcbddc->vec1_P;
1309     to = pcbddc->coarse_vec;
1310   }
1311   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1317 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1318 {
1319   PetscErrorCode ierr;
1320   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1321   PetscScalar    *array,*array2;
1322   Vec            from,to;
1323 
1324   PetscFunctionBegin;
1325   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1326     from = pcbddc->coarse_vec;
1327     to = pcbddc->vec1_P;
1328   } else { /* from local to global -> put data in coarse right hand side */
1329     from = pcbddc->vec1_P;
1330     to = pcbddc->coarse_vec;
1331   }
1332   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1333   if (smode == SCATTER_FORWARD) {
1334     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1335       Vec tvec;
1336       PetscInt lsize;
1337       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1338       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1339       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1340       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1341       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1342       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1343       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 /* uncomment for testing purposes */
1350 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1351 #undef __FUNCT__
1352 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1353 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1354 {
1355   PetscErrorCode    ierr;
1356   PC_IS*            pcis = (PC_IS*)(pc->data);
1357   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1358   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1359   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1360   MatType           impMatType=MATSEQAIJ;
1361   /* one and zero */
1362   PetscScalar       one=1.0,zero=0.0;
1363   /* space to store constraints and their local indices */
1364   PetscScalar       *temp_quadrature_constraint;
1365   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1366   /* iterators */
1367   PetscInt          i,j,k,total_counts,temp_start_ptr;
1368   /* stuff to store connected components stored in pcbddc->mat_graph */
1369   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1370   PetscInt          n_ISForFaces,n_ISForEdges;
1371   /* near null space stuff */
1372   MatNullSpace      nearnullsp;
1373   const Vec         *nearnullvecs;
1374   Vec               *localnearnullsp;
1375   PetscBool         nnsp_has_cnst;
1376   PetscInt          nnsp_size;
1377   PetscScalar       *array;
1378   /* BLAS integers */
1379   PetscBLASInt      lwork,lierr;
1380   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1381   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1382   /* LAPACK working arrays for SVD or POD */
1383   PetscBool         skip_lapack;
1384   PetscScalar       *work;
1385   PetscReal         *singular_vals;
1386 #if defined(PETSC_USE_COMPLEX)
1387   PetscReal         *rwork;
1388 #endif
1389 #if defined(PETSC_MISSING_LAPACK_GESVD)
1390   PetscBLASInt      Blas_one_2=1;
1391   PetscScalar       *temp_basis,*correlation_mat;
1392 #else
1393   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1394   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1395 #endif
1396   /* reuse */
1397   PetscInt          olocal_primal_size;
1398   PetscInt          *oprimal_indices_local_idxs;
1399   /* change of basis */
1400   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1401   PetscBool         boolforchange,qr_needed;
1402   PetscBT           touched,change_basis,qr_needed_idx;
1403   /* auxiliary stuff */
1404   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1405   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1406   PetscScalar       *temp_quadrature_constraint_work;
1407   /* some quantities */
1408   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1409   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1410 
1411 
1412   PetscFunctionBegin;
1413   /* Destroy Mat objects computed previously */
1414   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1415   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1416   /* Get index sets for faces, edges and vertices from graph */
1417   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1418   /* free unneeded index sets */
1419   if (!pcbddc->use_vertices) {
1420     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1421   }
1422   if (!pcbddc->use_edges) {
1423     for (i=0;i<n_ISForEdges;i++) {
1424       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1425     }
1426     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1427     n_ISForEdges = 0;
1428   }
1429   if (!pcbddc->use_faces) {
1430     for (i=0;i<n_ISForFaces;i++) {
1431       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1432     }
1433     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1434     n_ISForFaces = 0;
1435   }
1436   /* HACKS (the following two blocks of code) */
1437   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1438     pcbddc->use_change_of_basis = PETSC_TRUE;
1439     if (!ISForEdges) {
1440       pcbddc->use_change_on_faces = PETSC_TRUE;
1441     }
1442   }
1443   if (pcbddc->NullSpace) {
1444     /* use_change_of_basis should be consistent among processors */
1445     PetscBool tbool[2],gbool[2];
1446     tbool [0] = pcbddc->use_change_of_basis;
1447     tbool [1] = pcbddc->use_change_on_faces;
1448     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1449     pcbddc->use_change_of_basis = gbool[0];
1450     pcbddc->use_change_on_faces = gbool[1];
1451   }
1452   /* print some info */
1453   if (pcbddc->dbg_flag) {
1454     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1455     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1456     i = 0;
1457     if (ISForVertices) {
1458       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1459     }
1460     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1461     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1462     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1463     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1464   }
1465   /* check if near null space is attached to global mat */
1466   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1467   if (nearnullsp) {
1468     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1469     /* remove any stored info */
1470     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1471     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1472     /* store information for BDDC solver reuse */
1473     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1474     pcbddc->onearnullspace = nearnullsp;
1475     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1476     for (i=0;i<nnsp_size;i++) {
1477       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1478     }
1479   } else { /* if near null space is not provided BDDC uses constants by default */
1480     nnsp_size = 0;
1481     nnsp_has_cnst = PETSC_TRUE;
1482   }
1483   /* get max number of constraints on a single cc */
1484   max_constraints = nnsp_size;
1485   if (nnsp_has_cnst) max_constraints++;
1486 
1487   /*
1488        Evaluate maximum storage size needed by the procedure
1489        - temp_indices will contain start index of each constraint stored as follows
1490        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1491        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1492        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1493                                                                                                                                                          */
1494   total_counts = n_ISForFaces+n_ISForEdges;
1495   total_counts *= max_constraints;
1496   n_vertices = 0;
1497   if (ISForVertices) {
1498     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1499   }
1500   total_counts += n_vertices;
1501   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1502   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1503   total_counts = 0;
1504   max_size_of_constraint = 0;
1505   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1506     if (i<n_ISForEdges) {
1507       used_IS = &ISForEdges[i];
1508     } else {
1509       used_IS = &ISForFaces[i-n_ISForEdges];
1510     }
1511     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1512     total_counts += j;
1513     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1514   }
1515   total_counts *= max_constraints;
1516   total_counts += n_vertices;
1517   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1518   /* get local part of global near null space vectors */
1519   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1520   for (k=0;k<nnsp_size;k++) {
1521     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1522     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524   }
1525 
1526   /* whether or not to skip lapack calls */
1527   skip_lapack = PETSC_TRUE;
1528   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1529 
1530   /* allocate some auxiliary stuff */
1531   if (!skip_lapack || pcbddc->use_qr_single) {
1532     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1533   }
1534 
1535   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1536   if (!skip_lapack) {
1537     PetscScalar temp_work;
1538 
1539 #if defined(PETSC_MISSING_LAPACK_GESVD)
1540     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1541     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1542     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1543     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1544 #if defined(PETSC_USE_COMPLEX)
1545     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1546 #endif
1547     /* now we evaluate the optimal workspace using query with lwork=-1 */
1548     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1549     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1550     lwork = -1;
1551     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1552 #if !defined(PETSC_USE_COMPLEX)
1553     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1554 #else
1555     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1556 #endif
1557     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1558     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1559 #else /* on missing GESVD */
1560     /* SVD */
1561     PetscInt max_n,min_n;
1562     max_n = max_size_of_constraint;
1563     min_n = max_constraints;
1564     if (max_size_of_constraint < max_constraints) {
1565       min_n = max_size_of_constraint;
1566       max_n = max_constraints;
1567     }
1568     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1569 #if defined(PETSC_USE_COMPLEX)
1570     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1571 #endif
1572     /* now we evaluate the optimal workspace using query with lwork=-1 */
1573     lwork = -1;
1574     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1575     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1576     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1577     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1578 #if !defined(PETSC_USE_COMPLEX)
1579     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1580 #else
1581     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1582 #endif
1583     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1584     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1585 #endif /* on missing GESVD */
1586     /* Allocate optimal workspace */
1587     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1588     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1589   }
1590   /* Now we can loop on constraining sets */
1591   total_counts = 0;
1592   temp_indices[0] = 0;
1593   /* vertices */
1594   if (ISForVertices) {
1595     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1596     if (nnsp_has_cnst) { /* consider all vertices */
1597       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1598       for (i=0;i<n_vertices;i++) {
1599         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1600         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1601         total_counts++;
1602       }
1603     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1604       PetscBool used_vertex;
1605       for (i=0;i<n_vertices;i++) {
1606         used_vertex = PETSC_FALSE;
1607         k = 0;
1608         while (!used_vertex && k<nnsp_size) {
1609           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1610           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1611             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1612             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1613             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1614             total_counts++;
1615             used_vertex = PETSC_TRUE;
1616           }
1617           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1618           k++;
1619         }
1620       }
1621     }
1622     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1623     n_vertices = total_counts;
1624   }
1625 
1626   /* edges and faces */
1627   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1628     if (ncc<n_ISForEdges) {
1629       used_IS = &ISForEdges[ncc];
1630       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1631     } else {
1632       used_IS = &ISForFaces[ncc-n_ISForEdges];
1633       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1634     }
1635     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1636     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1637     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1638     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1639     /* change of basis should not be performed on local periodic nodes */
1640     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1641     if (nnsp_has_cnst) {
1642       PetscScalar quad_value;
1643       temp_constraints++;
1644       if (!pcbddc->use_nnsp_true) {
1645         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1646       } else {
1647         quad_value = 1.0;
1648       }
1649       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1650       for (j=0;j<size_of_constraint;j++) {
1651         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1652       }
1653       /* sort by global ordering if using lapack subroutines */
1654       if (!skip_lapack || pcbddc->use_qr_single) {
1655         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1656         for (j=0;j<size_of_constraint;j++) {
1657           permutation[j]=j;
1658         }
1659         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1660         for (j=0;j<size_of_constraint;j++) {
1661           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1662           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1663         }
1664         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1665         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1666       }
1667       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1668       total_counts++;
1669     }
1670     for (k=0;k<nnsp_size;k++) {
1671       PetscReal real_value;
1672       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1673       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1674       for (j=0;j<size_of_constraint;j++) {
1675         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1676       }
1677       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1678       /* check if array is null on the connected component */
1679       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1680       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1681       if (real_value > 0.0) { /* keep indices and values */
1682         /* sort by global ordering if using lapack subroutines */
1683         if (!skip_lapack || pcbddc->use_qr_single) {
1684           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1685           for (j=0;j<size_of_constraint;j++) {
1686             permutation[j]=j;
1687           }
1688           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1689           for (j=0;j<size_of_constraint;j++) {
1690             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1691             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1692           }
1693           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1694           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1695         }
1696         temp_constraints++;
1697         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1698         total_counts++;
1699       }
1700     }
1701     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1702     valid_constraints = temp_constraints;
1703     if (!pcbddc->use_nnsp_true && temp_constraints) {
1704       if (temp_constraints == 1) { /* just normalize the constraint */
1705         PetscScalar norm;
1706         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1707         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1708         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1709         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1710       } else { /* perform SVD */
1711         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1712 
1713 #if defined(PETSC_MISSING_LAPACK_GESVD)
1714         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1715            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1716            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1717               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1718               from that computed using LAPACKgesvd
1719            -> This is due to a different computation of eigenvectors in LAPACKheev
1720            -> The quality of the POD-computed basis will be the same */
1721         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1722         /* Store upper triangular part of correlation matrix */
1723         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1724         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1725         for (j=0;j<temp_constraints;j++) {
1726           for (k=0;k<j+1;k++) {
1727             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1728           }
1729         }
1730         /* compute eigenvalues and eigenvectors of correlation matrix */
1731         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1732         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1733 #if !defined(PETSC_USE_COMPLEX)
1734         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1735 #else
1736         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1737 #endif
1738         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1739         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1740         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1741         j = 0;
1742         while (j < temp_constraints && singular_vals[j] < tol) j++;
1743         total_counts = total_counts-j;
1744         valid_constraints = temp_constraints-j;
1745         /* scale and copy POD basis into used quadrature memory */
1746         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1747         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1748         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1749         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1750         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1751         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1752         if (j<temp_constraints) {
1753           PetscInt ii;
1754           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1755           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1756           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1757           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1758           for (k=0;k<temp_constraints-j;k++) {
1759             for (ii=0;ii<size_of_constraint;ii++) {
1760               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1761             }
1762           }
1763         }
1764 #else  /* on missing GESVD */
1765         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1766         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1767         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1768         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1769 #if !defined(PETSC_USE_COMPLEX)
1770         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1771 #else
1772         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1773 #endif
1774         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1775         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1776         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1777         k = temp_constraints;
1778         if (k > size_of_constraint) k = size_of_constraint;
1779         j = 0;
1780         while (j < k && singular_vals[k-j-1] < tol) j++;
1781         valid_constraints = k-j;
1782         total_counts = total_counts-temp_constraints+valid_constraints;
1783 #endif /* on missing GESVD */
1784       }
1785     }
1786     /* setting change_of_basis flag is safe now */
1787     if (boolforchange) {
1788       for (j=0;j<valid_constraints;j++) {
1789         PetscBTSet(change_basis,total_counts-j-1);
1790       }
1791     }
1792   }
1793   /* free index sets of faces, edges and vertices */
1794   for (i=0;i<n_ISForFaces;i++) {
1795     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1796   }
1797   if (n_ISForFaces) {
1798     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1799   }
1800   for (i=0;i<n_ISForEdges;i++) {
1801     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1802   }
1803   if (n_ISForEdges) {
1804     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1805   }
1806   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1807   /* map temp_indices_to_constraint in boundary numbering */
1808   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1809   if (i != temp_indices[total_counts]) {
1810     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1811   }
1812 
1813   /* free workspace */
1814   if (!skip_lapack || pcbddc->use_qr_single) {
1815     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1816   }
1817   if (!skip_lapack) {
1818     ierr = PetscFree(work);CHKERRQ(ierr);
1819 #if defined(PETSC_USE_COMPLEX)
1820     ierr = PetscFree(rwork);CHKERRQ(ierr);
1821 #endif
1822     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1823 #if defined(PETSC_MISSING_LAPACK_GESVD)
1824     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1825     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1826 #endif
1827   }
1828   for (k=0;k<nnsp_size;k++) {
1829     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1830   }
1831   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1832 
1833   /* set quantities in pcbddc data structure and store previous primal size */
1834   /* n_vertices defines the number of subdomain corners in the primal space */
1835   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1836   olocal_primal_size = pcbddc->local_primal_size;
1837   pcbddc->local_primal_size = total_counts;
1838   pcbddc->n_vertices = n_vertices;
1839   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1840 
1841   /* Create constraint matrix */
1842   /* The constraint matrix is used to compute the l2g map of primal dofs */
1843   /* so we need to set it up properly either with or without change of basis */
1844   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1845   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1846   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1847   /* array to compute a local numbering of constraints : vertices first then constraints */
1848   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1849   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1850   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1851   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1852   /* auxiliary stuff for basis change */
1853   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1854   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1855 
1856   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1857   total_primal_vertices=0;
1858   for (i=0;i<pcbddc->local_primal_size;i++) {
1859     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1860     if (size_of_constraint == 1) {
1861       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1862       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1863       aux_primal_minloc[total_primal_vertices]=0;
1864       total_primal_vertices++;
1865     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1866       PetscInt min_loc,min_index;
1867       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1868       /* find first untouched local node */
1869       k = 0;
1870       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1871       min_index = global_indices[k];
1872       min_loc = k;
1873       /* search the minimum among global nodes already untouched on the cc */
1874       for (k=1;k<size_of_constraint;k++) {
1875         /* there can be more than one constraint on a single connected component */
1876         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1877           min_index = global_indices[k];
1878           min_loc = k;
1879         }
1880       }
1881       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1882       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1883       aux_primal_minloc[total_primal_vertices]=min_loc;
1884       total_primal_vertices++;
1885     }
1886   }
1887   /* determine if a QR strategy is needed for change of basis */
1888   qr_needed = PETSC_FALSE;
1889   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1890   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1891     if (PetscBTLookup(change_basis,i)) {
1892       if (!pcbddc->use_qr_single) {
1893         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1894         j = 0;
1895         for (k=0;k<size_of_constraint;k++) {
1896           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1897             j++;
1898           }
1899         }
1900         /* found more than one primal dof on the cc */
1901         if (j > 1) {
1902           PetscBTSet(qr_needed_idx,i);
1903           qr_needed = PETSC_TRUE;
1904         }
1905       } else {
1906         PetscBTSet(qr_needed_idx,i);
1907         qr_needed = PETSC_TRUE;
1908       }
1909     }
1910   }
1911   /* free workspace */
1912   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1913 
1914   /* permute indices in order to have a sorted set of vertices */
1915   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1916 
1917   /* nonzero structure of constraint matrix */
1918   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1919   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1920   j=total_primal_vertices;
1921   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1922     if (!PetscBTLookup(change_basis,i)) {
1923       nnz[j]=temp_indices[i+1]-temp_indices[i];
1924       j++;
1925     }
1926   }
1927   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1928   ierr = PetscFree(nnz);CHKERRQ(ierr);
1929   /* set values in constraint matrix */
1930   for (i=0;i<total_primal_vertices;i++) {
1931     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1932   }
1933   total_counts = total_primal_vertices;
1934   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1935     if (!PetscBTLookup(change_basis,i)) {
1936       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1937       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1938       total_counts++;
1939     }
1940   }
1941   /* assembling */
1942   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1944   /*
1945   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1946   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1947   */
1948   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1949   if (pcbddc->use_change_of_basis) {
1950     /* dual and primal dofs on a single cc */
1951     PetscInt     dual_dofs,primal_dofs;
1952     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1953     PetscInt     primal_counter;
1954     /* working stuff for GEQRF */
1955     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1956     PetscBLASInt lqr_work;
1957     /* working stuff for UNGQR */
1958     PetscScalar  *gqr_work,lgqr_work_t;
1959     PetscBLASInt lgqr_work;
1960     /* working stuff for TRTRS */
1961     PetscScalar  *trs_rhs;
1962     PetscBLASInt Blas_NRHS;
1963     /* pointers for values insertion into change of basis matrix */
1964     PetscInt     *start_rows,*start_cols;
1965     PetscScalar  *start_vals;
1966     /* working stuff for values insertion */
1967     PetscBT      is_primal;
1968     /* matrix sizes */
1969     PetscInt     global_size,local_size;
1970     /* work array for nonzeros */
1971     PetscScalar  *nnz_array;
1972     /* temporary change of basis */
1973     Mat          localChangeOfBasisMatrix;
1974     /* auxiliary work for global change of basis */
1975     Vec          nnz_vec;
1976     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
1977     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
1978     PetscScalar  *vals;
1979     PetscBool    done;
1980 
1981     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
1982     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
1983     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1984     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1985 
1986     /* nonzeros for local mat */
1987     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1988     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1989     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1990       if (PetscBTLookup(change_basis,i)) {
1991         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1992         if (PetscBTLookup(qr_needed_idx,i)) {
1993           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1994         } else {
1995           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1996           /* get local primal index on the cc */
1997           j = 0;
1998           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1999           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2000         }
2001       }
2002     }
2003     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2004     /* Set initial identity in the matrix */
2005     for (i=0;i<pcis->n_B;i++) {
2006       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2007     }
2008 
2009     if (pcbddc->dbg_flag) {
2010       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2011       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2012     }
2013 
2014 
2015     /* Now we loop on the constraints which need a change of basis */
2016     /*
2017        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2018        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2019 
2020        Basic blocks of change of basis matrix T computed by
2021 
2022           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2023 
2024             | 1        0   ...        0         s_1/S |
2025             | 0        1   ...        0         s_2/S |
2026             |              ...                        |
2027             | 0        ...            1     s_{n-1}/S |
2028             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2029 
2030             with S = \sum_{i=1}^n s_i^2
2031             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2032                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2033 
2034           - QR decomposition of constraints otherwise
2035     */
2036     if (qr_needed) {
2037       /* space to store Q */
2038       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2039       /* first we issue queries for optimal work */
2040       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2041       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2042       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2043       lqr_work = -1;
2044       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2045       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2046       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2047       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2048       lgqr_work = -1;
2049       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2050       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2051       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2052       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2053       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2054       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2055       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2056       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2057       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2058       /* array to store scaling factors for reflectors */
2059       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2060       /* array to store rhs and solution of triangular solver */
2061       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2062       /* allocating workspace for check */
2063       if (pcbddc->dbg_flag) {
2064         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2065       }
2066     }
2067     /* array to store whether a node is primal or not */
2068     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2069     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2070     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2071     if (i != total_primal_vertices) {
2072       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2073     }
2074     for (i=0;i<total_primal_vertices;i++) {
2075       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2076     }
2077     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2078 
2079     /* loop on constraints and see whether or not they need a change of basis and compute it */
2080     /* -> using implicit ordering contained in temp_indices data */
2081     total_counts = pcbddc->n_vertices;
2082     primal_counter = total_counts;
2083     while (total_counts<pcbddc->local_primal_size) {
2084       primal_dofs = 1;
2085       if (PetscBTLookup(change_basis,total_counts)) {
2086         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2087         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2088           primal_dofs++;
2089         }
2090         /* get constraint info */
2091         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2092         dual_dofs = size_of_constraint-primal_dofs;
2093 
2094         if (pcbddc->dbg_flag) {
2095           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2096         }
2097 
2098         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2099 
2100           /* copy quadrature constraints for change of basis check */
2101           if (pcbddc->dbg_flag) {
2102             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2103           }
2104           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2105           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2106 
2107           /* compute QR decomposition of constraints */
2108           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2109           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2110           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2111           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2112           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2113           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2114           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2115 
2116           /* explictly compute R^-T */
2117           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2118           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2119           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2120           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2121           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2122           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2123           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2124           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2125           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2126           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2127 
2128           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2129           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2130           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2131           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2132           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2133           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2134           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2135           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2136           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2137 
2138           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2139              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2140              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2141           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2142           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2143           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2144           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2145           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2146           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2147           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2148           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2149           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2150           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2151 
2152           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2153           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2154           /* insert cols for primal dofs */
2155           for (j=0;j<primal_dofs;j++) {
2156             start_vals = &qr_basis[j*size_of_constraint];
2157             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2158             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2159           }
2160           /* insert cols for dual dofs */
2161           for (j=0,k=0;j<dual_dofs;k++) {
2162             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2163               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2164               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2165               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2166               j++;
2167             }
2168           }
2169 
2170           /* check change of basis */
2171           if (pcbddc->dbg_flag) {
2172             PetscInt   ii,jj;
2173             PetscBool valid_qr=PETSC_TRUE;
2174             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2175             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2176             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2177             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2178             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2179             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2180             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2181             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2182             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2183             for (jj=0;jj<size_of_constraint;jj++) {
2184               for (ii=0;ii<primal_dofs;ii++) {
2185                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2186                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2187               }
2188             }
2189             if (!valid_qr) {
2190               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2191               for (jj=0;jj<size_of_constraint;jj++) {
2192                 for (ii=0;ii<primal_dofs;ii++) {
2193                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2194                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2195                   }
2196                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2197                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2198                   }
2199                 }
2200               }
2201             } else {
2202               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2203             }
2204           }
2205         } else { /* simple transformation block */
2206           PetscInt    row,col;
2207           PetscScalar val,norm;
2208 
2209           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2210           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2211           for (j=0;j<size_of_constraint;j++) {
2212             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2213             if (!PetscBTLookup(is_primal,row)) {
2214               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2215               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2216               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2217             } else {
2218               for (k=0;k<size_of_constraint;k++) {
2219                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2220                 if (row != col) {
2221                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2222                 } else {
2223                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2224                 }
2225                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2226               }
2227             }
2228           }
2229           if (pcbddc->dbg_flag) {
2230             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2231           }
2232         }
2233         /* increment primal counter */
2234         primal_counter += primal_dofs;
2235       } else {
2236         if (pcbddc->dbg_flag) {
2237           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2238         }
2239       }
2240       /* increment constraint counter total_counts */
2241       total_counts += primal_dofs;
2242     }
2243 
2244     /* free workspace */
2245     if (qr_needed) {
2246       if (pcbddc->dbg_flag) {
2247         ierr = PetscFree(work);CHKERRQ(ierr);
2248       }
2249       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2250       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2251       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2252       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2253       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2254     }
2255     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2256     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2257     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258 
2259     /* assembling of global change of variable */
2260     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2261     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2262     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2263     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2264     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2265     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2266 
2267     /* nonzeros (overestimated) */
2268     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2269     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2270     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2271     for (i=0;i<pcis->n;i++) {
2272       nnz_array[i] = 1.0;
2273       idxs_all[i] = i;
2274     }
2275     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2276     for (i=0;i<pcis->n_B;i++) {
2277       nnz_array[idxs_B[i]] = nnz[i];
2278     }
2279     if (pcis->n) {
2280       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2281     }
2282     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2283     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2284     ierr = PetscFree(nnz);CHKERRQ(ierr);
2285     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2286     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2287     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2288     for (i=0;i<local_size;i++) {
2289       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2290       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2291     }
2292     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2293     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2294     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2295     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2296 
2297     /* Set identity on dirichlet dofs */
2298     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2299     for (i=0;i<pcis->n-pcis->n_B;i++) {
2300       PetscScalar one=1.0;
2301       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2302     }
2303     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2304 
2305     /* Set values at interface dofs */
2306     done = PETSC_TRUE;
2307     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2308     if (!done) {
2309       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2310     }
2311     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2312     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2313     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2314     for (i=0;i<nvtxs;i++) {
2315       PetscInt    row,*cols,ncols;
2316       PetscScalar *mat_vals;
2317 
2318       row = idxs_B[i];
2319       ncols = xadj[i+1]-xadj[i];
2320       cols = idxs_mapped+xadj[i];
2321       mat_vals = vals+xadj[i];
2322       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2323     }
2324     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2325     if (!done) {
2326       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2327     }
2328     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2329     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2330     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2331     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2332     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2333 
2334     /* check */
2335     if (pcbddc->dbg_flag) {
2336       PetscReal error;
2337       Vec       x,x_change;
2338 
2339       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2340       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2341       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2342       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2343       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2344       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2345       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2346       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2347       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2348       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2349       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2350       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2351       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2352       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2353       ierr = VecDestroy(&x);CHKERRQ(ierr);
2354       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2355     }
2356     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2357   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2358     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2359     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2360   }
2361 
2362   /* set up change of basis context */
2363   if (pcbddc->ChangeOfBasisMatrix) {
2364     PCBDDCChange_ctx change_ctx;
2365 
2366     if (!pcbddc->new_global_mat) {
2367       PetscInt global_size,local_size;
2368 
2369       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2370       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2371       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2372       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2373       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2374       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2375       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2376       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2377       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2378     } else {
2379       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2380       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2381       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2382     }
2383     if (!pcbddc->user_ChangeOfBasisMatrix) {
2384       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2385       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2386     } else {
2387       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2388       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2389     }
2390     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2391     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2392     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   }
2395 
2396   /* get indices in local ordering for vertices and constraints */
2397   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2398     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2399     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2400   }
2401   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2402   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2403   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2404   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2405   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2406   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2407   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2408   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2409   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2410   /* set quantities in PCBDDC data struct */
2411   pcbddc->n_actual_vertices = i;
2412   /* check if a new primal space has been introduced */
2413   pcbddc->new_primal_space_local = PETSC_TRUE;
2414   if (olocal_primal_size == pcbddc->local_primal_size) {
2415     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2416     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2417     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2418   }
2419   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2420   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2421 
2422   /* flush dbg viewer */
2423   if (pcbddc->dbg_flag) {
2424     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2425   }
2426 
2427   /* free workspace */
2428   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2429   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2430   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2431   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2432   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2433   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2439 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2440 {
2441   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2442   PC_IS       *pcis = (PC_IS*)pc->data;
2443   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2444   PetscInt    ierr,i,vertex_size;
2445   PetscViewer viewer=pcbddc->dbg_viewer;
2446 
2447   PetscFunctionBegin;
2448   /* Reset previously computed graph */
2449   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2450   /* Init local Graph struct */
2451   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2452 
2453   /* Check validity of the csr graph passed in by the user */
2454   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2455     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2456   }
2457 
2458   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2459   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2460     Mat       mat_adj;
2461     PetscInt  *xadj,*adjncy;
2462     PetscInt  nvtxs;
2463     PetscBool flg_row=PETSC_TRUE;
2464 
2465     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2466     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2467     if (!flg_row) {
2468       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2469     }
2470     if (pcbddc->use_local_adj) {
2471       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2472       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2473     } else { /* just compute subdomain's connected components */
2474       IS                     is_dummy;
2475       ISLocalToGlobalMapping l2gmap_dummy;
2476       PetscInt               j,sum;
2477       PetscInt               *cxadj,*cadjncy;
2478       const PetscInt         *idxs;
2479       PCBDDCGraph            graph;
2480       PetscBT                is_on_boundary;
2481 
2482       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2483       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2484       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2485       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2486       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2487       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2488       graph->xadj = xadj;
2489       graph->adjncy = adjncy;
2490       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2491       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2492 
2493       if (pcbddc->dbg_flag) {
2494         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2495         for (i=0;i<graph->ncc;i++) {
2496           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2497         }
2498         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2499       }
2500 
2501       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2502       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2503       for (i=0;i<pcis->n_B;i++) {
2504         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2505       }
2506       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2507 
2508       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2509       sum = 0;
2510       for (i=0;i<graph->ncc;i++) {
2511         PetscInt sizecc = 0;
2512         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2513           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2514             sizecc++;
2515           }
2516         }
2517         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2518           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2519             cxadj[graph->queue[j]] = sizecc;
2520           }
2521         }
2522         sum += sizecc*sizecc;
2523       }
2524       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2525       sum = 0;
2526       for (i=0;i<nvtxs;i++) {
2527         PetscInt temp = cxadj[i];
2528         cxadj[i] = sum;
2529         sum += temp;
2530       }
2531       cxadj[nvtxs] = sum;
2532       for (i=0;i<graph->ncc;i++) {
2533         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2534           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2535             PetscInt k,sizecc = 0;
2536             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2537               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2538                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2539                 sizecc++;
2540               }
2541             }
2542           }
2543         }
2544       }
2545       if (nvtxs) {
2546         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2547       } else {
2548         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2549         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2550       }
2551       graph->xadj = 0;
2552       graph->adjncy = 0;
2553       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2554       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2555     }
2556     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2557     if (!flg_row) {
2558       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2559     }
2560     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2561   }
2562 
2563   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2564   vertex_size = 1;
2565   if (pcbddc->user_provided_isfordofs) {
2566     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2567       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2568       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2569         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2570         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2571       }
2572       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2573       pcbddc->n_ISForDofs = 0;
2574       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2575     }
2576     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2577     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2578   } else {
2579     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2580       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2581       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2582       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2583         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2584       }
2585     }
2586   }
2587 
2588   /* Setup of Graph */
2589   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2590     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2591   }
2592   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2593     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2594   }
2595   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2596 
2597   /* Graph's connected components analysis */
2598   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2599 
2600   /* print some info to stdout */
2601   if (pcbddc->dbg_flag) {
2602     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2603   }
2604 
2605   /* mark topography has done */
2606   pcbddc->recompute_topography = PETSC_FALSE;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 #undef __FUNCT__
2611 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2612 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2613 {
2614   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2615   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2616   PetscErrorCode ierr;
2617 
2618   PetscFunctionBegin;
2619   n = 0;
2620   vertices = 0;
2621   if (pcbddc->ConstraintMatrix) {
2622     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2623     for (i=0;i<local_primal_size;i++) {
2624       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2625       if (size_of_constraint == 1) n++;
2626       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2627     }
2628     if (vertices_idx) {
2629       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2630       n = 0;
2631       for (i=0;i<local_primal_size;i++) {
2632         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2633         if (size_of_constraint == 1) {
2634           vertices[n++]=row_cmat_indices[0];
2635         }
2636         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2637       }
2638     }
2639   }
2640   *n_vertices = n;
2641   if (vertices_idx) *vertices_idx = vertices;
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2647 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2648 {
2649   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2650   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2651   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2652   PetscBT        touched;
2653   PetscErrorCode ierr;
2654 
2655     /* This function assumes that the number of local constraints per connected component
2656        is not greater than the number of nodes defined for the connected component
2657        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2658   PetscFunctionBegin;
2659   n = 0;
2660   constraints_index = 0;
2661   if (pcbddc->ConstraintMatrix) {
2662     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2663     max_size_of_constraint = 0;
2664     for (i=0;i<local_primal_size;i++) {
2665       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2666       if (size_of_constraint > 1) {
2667         n++;
2668       }
2669       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2670       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2671     }
2672     if (constraints_idx) {
2673       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2674       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2675       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2676       n = 0;
2677       for (i=0;i<local_primal_size;i++) {
2678         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2679         if (size_of_constraint > 1) {
2680           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2681           /* find first untouched local node */
2682           j = 0;
2683           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2684           min_index = row_cmat_global_indices[j];
2685           min_loc = j;
2686           /* search the minimum among nodes not yet touched on the connected component
2687              since there can be more than one constraint on a single cc */
2688           for (j=1;j<size_of_constraint;j++) {
2689             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2690               min_index = row_cmat_global_indices[j];
2691               min_loc = j;
2692             }
2693           }
2694           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2695           constraints_index[n++] = row_cmat_indices[min_loc];
2696         }
2697         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2698       }
2699       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2700       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2701     }
2702   }
2703   *n_constraints = n;
2704   if (constraints_idx) *constraints_idx = constraints_index;
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 #undef __FUNCT__
2709 #define __FUNCT__ "PCBDDCSubsetNumbering"
2710 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2711 {
2712   Vec            local_vec,global_vec;
2713   IS             seqis,paris;
2714   VecScatter     scatter_ctx;
2715   PetscScalar    *array;
2716   PetscInt       *temp_global_dofs;
2717   PetscScalar    globalsum;
2718   PetscInt       i,j,s;
2719   PetscInt       nlocals,first_index,old_index,max_local;
2720   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2721   PetscMPIInt    *dof_sizes,*dof_displs;
2722   PetscBool      first_found;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   /* mpi buffers */
2727   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2728   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2729   j = ( !rank_prec_comm ? size_prec_comm : 0);
2730   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2731   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2732   /* get maximum size of subset */
2733   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2734   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2735   max_local = 0;
2736   for (i=0;i<n_local_dofs;i++) {
2737     if (max_local < temp_global_dofs[i] ) {
2738       max_local = temp_global_dofs[i];
2739     }
2740   }
2741   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2742   max_global++;
2743   max_local = 0;
2744   for (i=0;i<n_local_dofs;i++) {
2745     if (max_local < local_dofs[i] ) {
2746       max_local = local_dofs[i];
2747     }
2748   }
2749   max_local++;
2750   /* allocate workspace */
2751   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2752   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2753   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2754   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2755   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2756   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2757   /* create scatter */
2758   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2759   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2760   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2761   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2762   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2763   /* init array */
2764   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2765   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2766   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2767   if (local_dofs_mult) {
2768     for (i=0;i<n_local_dofs;i++) {
2769       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2770     }
2771   } else {
2772     for (i=0;i<n_local_dofs;i++) {
2773       array[local_dofs[i]]=1.0;
2774     }
2775   }
2776   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2777   /* scatter into global vec and get total number of global dofs */
2778   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2779   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2780   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2781   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2782   /* Fill global_vec with cumulative function for global numbering */
2783   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2784   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2785   nlocals = 0;
2786   first_index = -1;
2787   first_found = PETSC_FALSE;
2788   for (i=0;i<s;i++) {
2789     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2790       first_found = PETSC_TRUE;
2791       first_index = i;
2792     }
2793     nlocals += (PetscInt)PetscRealPart(array[i]);
2794   }
2795   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2796   if (!rank_prec_comm) {
2797     dof_displs[0]=0;
2798     for (i=1;i<size_prec_comm;i++) {
2799       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2800     }
2801   }
2802   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2803   if (first_found) {
2804     array[first_index] += (PetscScalar)nlocals;
2805     old_index = first_index;
2806     for (i=first_index+1;i<s;i++) {
2807       if (PetscRealPart(array[i]) > 0.1) {
2808         array[i] += array[old_index];
2809         old_index = i;
2810       }
2811     }
2812   }
2813   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2814   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2815   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2816   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2817   /* get global ordering of local dofs */
2818   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2819   if (local_dofs_mult) {
2820     for (i=0;i<n_local_dofs;i++) {
2821       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2822     }
2823   } else {
2824     for (i=0;i<n_local_dofs;i++) {
2825       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2826     }
2827   }
2828   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2829   /* free workspace */
2830   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2831   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2832   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2833   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2834   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2835   /* return pointer to global ordering of local dofs */
2836   *global_numbering_subset = temp_global_dofs;
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2842 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2843 {
2844   PetscInt       i,j;
2845   PetscScalar    *alphas;
2846   PetscErrorCode ierr;
2847 
2848   PetscFunctionBegin;
2849   /* this implements stabilized Gram-Schmidt */
2850   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2851   for (i=0;i<n;i++) {
2852     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2853     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2854     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2855   }
2856   ierr = PetscFree(alphas);CHKERRQ(ierr);
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatISGetSubassemblingPattern"
2862 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2863 {
2864   Mat             subdomain_adj;
2865   IS              new_ranks,ranks_send_to;
2866   MatPartitioning partitioner;
2867   Mat_IS          *matis;
2868   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2869   PetscInt        prank;
2870   PetscMPIInt     size,rank,color;
2871   PetscInt        *xadj,*adjncy,*oldranks;
2872   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2873   PetscInt        i,local_size,threshold=0;
2874   PetscErrorCode  ierr;
2875   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2876   PetscSubcomm    subcomm;
2877 
2878   PetscFunctionBegin;
2879   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2880   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2881   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2882 
2883   /* Get info on mapping */
2884   matis = (Mat_IS*)(mat->data);
2885   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2886   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2887 
2888   /* build local CSR graph of subdomains' connectivity */
2889   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2890   xadj[0] = 0;
2891   xadj[1] = PetscMax(n_neighs-1,0);
2892   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2893   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2894 
2895   if (threshold) {
2896     PetscInt xadj_count = 0;
2897     for (i=1;i<n_neighs;i++) {
2898       if (n_shared[i] > threshold) {
2899         adjncy[xadj_count] = neighs[i];
2900         adjncy_wgt[xadj_count] = n_shared[i];
2901         xadj_count++;
2902       }
2903     }
2904     xadj[1] = xadj_count;
2905   } else {
2906     if (xadj[1]) {
2907       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2908       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2909     }
2910   }
2911   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2912   if (use_square) {
2913     for (i=0;i<xadj[1];i++) {
2914       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2915     }
2916   }
2917   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2918 
2919   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
2920 
2921   /*
2922     Restrict work on active processes only.
2923   */
2924   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2925   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2926   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2927   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2928   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2929   if (color) {
2930     ierr = PetscFree(xadj);CHKERRQ(ierr);
2931     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2932     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2933   } else {
2934     PetscInt coarsening_ratio;
2935     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2936     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2937     prank = rank;
2938     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2939     /*
2940     for (i=0;i<size;i++) {
2941       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2942     }
2943     */
2944     for (i=0;i<xadj[1];i++) {
2945       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2946     }
2947     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2948     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2949     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2950 
2951     /* Partition */
2952     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2953     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2954     if (use_vwgt) {
2955       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
2956       v_wgt[0] = local_size;
2957       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2958     }
2959     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2960     coarsening_ratio = size/n_subdomains;
2961     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2962     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2963     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2964     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2965 
2966     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2967     if (contiguous) {
2968       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2969     } else {
2970       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2971     }
2972     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2973     /* clean up */
2974     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2975     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2976     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2977     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2978   }
2979   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2980 
2981   /* assemble parallel IS for sends */
2982   i = 1;
2983   if (color) i=0;
2984   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2985 
2986   /* get back IS */
2987   *is_sends = ranks_send_to;
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2992 
2993 #undef __FUNCT__
2994 #define __FUNCT__ "MatISSubassemble"
2995 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2996 {
2997   Mat                    local_mat;
2998   Mat_IS                 *matis;
2999   IS                     is_sends_internal;
3000   PetscInt               rows,cols,new_local_rows;
3001   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3002   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3003   ISLocalToGlobalMapping l2gmap;
3004   PetscInt*              l2gmap_indices;
3005   const PetscInt*        is_indices;
3006   MatType                new_local_type;
3007   /* buffers */
3008   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3009   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3010   PetscInt               *recv_buffer_idxs_local;
3011   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3012   /* MPI */
3013   MPI_Comm               comm,comm_n;
3014   PetscSubcomm           subcomm;
3015   PetscMPIInt            n_sends,n_recvs,commsize;
3016   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3017   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3018   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3019   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3020   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3021   PetscErrorCode         ierr;
3022 
3023   PetscFunctionBegin;
3024   /* TODO: add missing checks */
3025   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3026   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3027   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3028   PetscValidLogicalCollectiveInt(mat,nis,7);
3029   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3030   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3031   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3032   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3033   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3034   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3035   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3036   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3037     PetscInt mrows,mcols,mnrows,mncols;
3038     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3039     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3040     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3041     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3042     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3043     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3044   }
3045   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3046   PetscValidLogicalCollectiveInt(mat,bs,0);
3047   /* prepare IS for sending if not provided */
3048   if (!is_sends) {
3049     PetscBool pcontig = PETSC_TRUE;
3050     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3051     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3052   } else {
3053     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3054     is_sends_internal = is_sends;
3055   }
3056 
3057   /* get pointer of MATIS data */
3058   matis = (Mat_IS*)mat->data;
3059 
3060   /* get comm */
3061   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3062 
3063   /* compute number of sends */
3064   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3065   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3066 
3067   /* compute number of receives */
3068   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3069   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3070   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3071   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3072   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3073   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3074   ierr = PetscFree(iflags);CHKERRQ(ierr);
3075 
3076   /* restrict comm if requested */
3077   subcomm = 0;
3078   destroy_mat = PETSC_FALSE;
3079   if (restrict_comm) {
3080     PetscMPIInt color,rank,subcommsize;
3081     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3082     color = 0;
3083     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3084     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3085     subcommsize = commsize - subcommsize;
3086     /* check if reuse has been requested */
3087     if (reuse == MAT_REUSE_MATRIX) {
3088       if (*mat_n) {
3089         PetscMPIInt subcommsize2;
3090         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3091         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3092         comm_n = PetscObjectComm((PetscObject)*mat_n);
3093       } else {
3094         comm_n = PETSC_COMM_SELF;
3095       }
3096     } else { /* MAT_INITIAL_MATRIX */
3097       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3098       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3099       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3100       comm_n = subcomm->comm;
3101     }
3102     /* flag to destroy *mat_n if not significative */
3103     if (color) destroy_mat = PETSC_TRUE;
3104   } else {
3105     comm_n = comm;
3106   }
3107 
3108   /* prepare send/receive buffers */
3109   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3110   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3111   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3112   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3113   if (nis) {
3114     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3115   }
3116 
3117   /* Get data from local matrices */
3118   if (!isdense) {
3119     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3120     /* TODO: See below some guidelines on how to prepare the local buffers */
3121     /*
3122        send_buffer_vals should contain the raw values of the local matrix
3123        send_buffer_idxs should contain:
3124        - MatType_PRIVATE type
3125        - PetscInt        size_of_l2gmap
3126        - PetscInt        global_row_indices[size_of_l2gmap]
3127        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3128     */
3129   } else {
3130     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3131     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3132     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3133     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3134     send_buffer_idxs[1] = i;
3135     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3136     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3137     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3138     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3139     for (i=0;i<n_sends;i++) {
3140       ilengths_vals[is_indices[i]] = len*len;
3141       ilengths_idxs[is_indices[i]] = len+2;
3142     }
3143   }
3144   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3145   /* additional is (if any) */
3146   if (nis) {
3147     PetscMPIInt psum;
3148     PetscInt j;
3149     for (j=0,psum=0;j<nis;j++) {
3150       PetscInt plen;
3151       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3152       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3153       psum += len+1; /* indices + lenght */
3154     }
3155     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3156     for (j=0,psum=0;j<nis;j++) {
3157       PetscInt plen;
3158       const PetscInt *is_array_idxs;
3159       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3160       send_buffer_idxs_is[psum] = plen;
3161       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3162       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3163       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3164       psum += plen+1; /* indices + lenght */
3165     }
3166     for (i=0;i<n_sends;i++) {
3167       ilengths_idxs_is[is_indices[i]] = psum;
3168     }
3169     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3170   }
3171 
3172   buf_size_idxs = 0;
3173   buf_size_vals = 0;
3174   buf_size_idxs_is = 0;
3175   for (i=0;i<n_recvs;i++) {
3176     buf_size_idxs += (PetscInt)olengths_idxs[i];
3177     buf_size_vals += (PetscInt)olengths_vals[i];
3178     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3179   }
3180   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3181   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3182   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3183 
3184   /* get new tags for clean communications */
3185   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3186   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3187   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3188 
3189   /* allocate for requests */
3190   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3191   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3192   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3193   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3195   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3196 
3197   /* communications */
3198   ptr_idxs = recv_buffer_idxs;
3199   ptr_vals = recv_buffer_vals;
3200   ptr_idxs_is = recv_buffer_idxs_is;
3201   for (i=0;i<n_recvs;i++) {
3202     source_dest = onodes[i];
3203     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3204     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3205     ptr_idxs += olengths_idxs[i];
3206     ptr_vals += olengths_vals[i];
3207     if (nis) {
3208       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3209       ptr_idxs_is += olengths_idxs_is[i];
3210     }
3211   }
3212   for (i=0;i<n_sends;i++) {
3213     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3214     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3215     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3216     if (nis) {
3217       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3218     }
3219   }
3220   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3221   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3222 
3223   /* assemble new l2g map */
3224   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3225   ptr_idxs = recv_buffer_idxs;
3226   new_local_rows = 0;
3227   for (i=0;i<n_recvs;i++) {
3228     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3229     ptr_idxs += olengths_idxs[i];
3230   }
3231   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3232   ptr_idxs = recv_buffer_idxs;
3233   new_local_rows = 0;
3234   for (i=0;i<n_recvs;i++) {
3235     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3236     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3237     ptr_idxs += olengths_idxs[i];
3238   }
3239   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3240   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3241   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3242 
3243   /* infer new local matrix type from received local matrices type */
3244   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3245   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3246   if (n_recvs) {
3247     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3248     ptr_idxs = recv_buffer_idxs;
3249     for (i=0;i<n_recvs;i++) {
3250       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3251         new_local_type_private = MATAIJ_PRIVATE;
3252         break;
3253       }
3254       ptr_idxs += olengths_idxs[i];
3255     }
3256     switch (new_local_type_private) {
3257       case MATDENSE_PRIVATE:
3258         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3259           new_local_type = MATSEQAIJ;
3260           bs = 1;
3261         } else { /* if I receive only 1 dense matrix */
3262           new_local_type = MATSEQDENSE;
3263           bs = 1;
3264         }
3265         break;
3266       case MATAIJ_PRIVATE:
3267         new_local_type = MATSEQAIJ;
3268         bs = 1;
3269         break;
3270       case MATBAIJ_PRIVATE:
3271         new_local_type = MATSEQBAIJ;
3272         break;
3273       case MATSBAIJ_PRIVATE:
3274         new_local_type = MATSEQSBAIJ;
3275         break;
3276       default:
3277         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3278         break;
3279     }
3280   } else { /* by default, new_local_type is seqdense */
3281     new_local_type = MATSEQDENSE;
3282     bs = 1;
3283   }
3284 
3285   /* create MATIS object if needed */
3286   if (reuse == MAT_INITIAL_MATRIX) {
3287     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3288     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3289   } else {
3290     /* it also destroys the local matrices */
3291     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3292   }
3293   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3294   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3295 
3296   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3297 
3298   /* Global to local map of received indices */
3299   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3300   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3301   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3302 
3303   /* restore attributes -> type of incoming data and its size */
3304   buf_size_idxs = 0;
3305   for (i=0;i<n_recvs;i++) {
3306     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3307     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3308     buf_size_idxs += (PetscInt)olengths_idxs[i];
3309   }
3310   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3311 
3312   /* set preallocation */
3313   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3314   if (!newisdense) {
3315     PetscInt *new_local_nnz=0;
3316 
3317     ptr_vals = recv_buffer_vals;
3318     ptr_idxs = recv_buffer_idxs_local;
3319     if (n_recvs) {
3320       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3321     }
3322     for (i=0;i<n_recvs;i++) {
3323       PetscInt j;
3324       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3325         for (j=0;j<*(ptr_idxs+1);j++) {
3326           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3327         }
3328       } else {
3329         /* TODO */
3330       }
3331       ptr_idxs += olengths_idxs[i];
3332     }
3333     if (new_local_nnz) {
3334       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3335       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3336       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3337       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3338       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3339       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3340     } else {
3341       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3342     }
3343     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3344   } else {
3345     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3346   }
3347 
3348   /* set values */
3349   ptr_vals = recv_buffer_vals;
3350   ptr_idxs = recv_buffer_idxs_local;
3351   for (i=0;i<n_recvs;i++) {
3352     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3353       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3354       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3355       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3356       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3357       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3358     } else {
3359       /* TODO */
3360     }
3361     ptr_idxs += olengths_idxs[i];
3362     ptr_vals += olengths_vals[i];
3363   }
3364   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3365   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3366   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3369   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3370 
3371 #if 0
3372   if (!restrict_comm) { /* check */
3373     Vec       lvec,rvec;
3374     PetscReal infty_error;
3375 
3376     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3377     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3378     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3379     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3380     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3381     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3382     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3383     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3384     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3385   }
3386 #endif
3387 
3388   /* assemble new additional is (if any) */
3389   if (nis) {
3390     PetscInt **temp_idxs,*count_is,j,psum;
3391 
3392     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3393     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3394     ptr_idxs = recv_buffer_idxs_is;
3395     psum = 0;
3396     for (i=0;i<n_recvs;i++) {
3397       for (j=0;j<nis;j++) {
3398         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3399         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3400         psum += plen;
3401         ptr_idxs += plen+1; /* shift pointer to received data */
3402       }
3403     }
3404     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3405     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3406     for (i=1;i<nis;i++) {
3407       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3408     }
3409     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3410     ptr_idxs = recv_buffer_idxs_is;
3411     for (i=0;i<n_recvs;i++) {
3412       for (j=0;j<nis;j++) {
3413         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3414         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3415         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3416         ptr_idxs += plen+1; /* shift pointer to received data */
3417       }
3418     }
3419     for (i=0;i<nis;i++) {
3420       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3421       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3422       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3423     }
3424     ierr = PetscFree(count_is);CHKERRQ(ierr);
3425     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3426     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3427   }
3428   /* free workspace */
3429   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3430   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3431   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3432   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3433   if (isdense) {
3434     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3435     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3436   } else {
3437     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3438   }
3439   if (nis) {
3440     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3441     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3442   }
3443   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3444   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3445   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3446   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3447   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3448   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3449   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3450   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3451   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3452   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3453   ierr = PetscFree(onodes);CHKERRQ(ierr);
3454   if (nis) {
3455     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3456     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3457     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3458   }
3459   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3460   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3461     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3462     for (i=0;i<nis;i++) {
3463       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3464     }
3465   }
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 /* temporary hack into ksp private data structure */
3470 #include <petsc-private/kspimpl.h>
3471 
3472 #undef __FUNCT__
3473 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3474 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3475 {
3476   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3477   PC_IS                  *pcis = (PC_IS*)pc->data;
3478   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3479   MatNullSpace           CoarseNullSpace=NULL;
3480   ISLocalToGlobalMapping coarse_islg;
3481   IS                     coarse_is,*isarray;
3482   PetscInt               i,im_active=-1,active_procs=-1;
3483   PetscInt               nis,nisdofs,nisneu;
3484   PC                     pc_temp;
3485   PCType                 coarse_pc_type;
3486   KSPType                coarse_ksp_type;
3487   PetscBool              multilevel_requested,multilevel_allowed;
3488   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3489   Mat                    t_coarse_mat_is;
3490   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3491   PetscMPIInt            all_procs;
3492   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3493   PetscBool              compute_vecs = PETSC_FALSE;
3494   PetscScalar            *array;
3495   PetscErrorCode         ierr;
3496 
3497   PetscFunctionBegin;
3498   /* Assign global numbering to coarse dofs */
3499   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3500     compute_vecs = PETSC_TRUE;
3501     PetscInt ocoarse_size;
3502     ocoarse_size = pcbddc->coarse_size;
3503     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3504     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3505     /* see if we can avoid some work */
3506     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3507       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3508         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3509         coarse_reuse = PETSC_FALSE;
3510       } else { /* we can safely reuse already computed coarse matrix */
3511         coarse_reuse = PETSC_TRUE;
3512       }
3513     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3514       coarse_reuse = PETSC_FALSE;
3515     }
3516     /* reset any subassembling information */
3517     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3518     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3519   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3520     coarse_reuse = PETSC_TRUE;
3521   }
3522 
3523   /* count "active" (i.e. with positive local size) and "void" processes */
3524   im_active = !!(pcis->n);
3525   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3527   void_procs = all_procs-active_procs;
3528   csin_type_simple = PETSC_TRUE;
3529   redist = PETSC_FALSE;
3530   if (pcbddc->current_level && void_procs) {
3531     csin_ml = PETSC_TRUE;
3532     ncoarse_ml = void_procs;
3533     csin_ds = PETSC_TRUE;
3534     ncoarse_ds = void_procs;
3535   } else {
3536     csin_ml = PETSC_FALSE;
3537     ncoarse_ml = all_procs;
3538     if (void_procs) {
3539       csin_ds = PETSC_TRUE;
3540       ncoarse_ds = void_procs;
3541       csin_type_simple = PETSC_FALSE;
3542     } else {
3543       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3544         csin_ds = PETSC_TRUE;
3545         ncoarse_ds = pcbddc->redistribute_coarse;
3546         redist = PETSC_TRUE;
3547       } else {
3548         csin_ds = PETSC_FALSE;
3549         ncoarse_ds = all_procs;
3550       }
3551     }
3552   }
3553 
3554   /*
3555     test if we can go multilevel: three conditions must be satisfied:
3556     - we have not exceeded the number of levels requested
3557     - we can actually subassemble the active processes
3558     - we can find a suitable number of MPI processes where we can place the subassembled problem
3559   */
3560   multilevel_allowed = PETSC_FALSE;
3561   multilevel_requested = PETSC_FALSE;
3562   if (pcbddc->current_level < pcbddc->max_levels) {
3563     multilevel_requested = PETSC_TRUE;
3564     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3565       multilevel_allowed = PETSC_FALSE;
3566     } else {
3567       multilevel_allowed = PETSC_TRUE;
3568     }
3569   }
3570   /* determine number of process partecipating to coarse solver */
3571   if (multilevel_allowed) {
3572     ncoarse = ncoarse_ml;
3573     csin = csin_ml;
3574   } else {
3575     ncoarse = ncoarse_ds;
3576     csin = csin_ds;
3577   }
3578 
3579   /* creates temporary l2gmap and IS for coarse indexes */
3580   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3581   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3582 
3583   /* creates temporary MATIS object for coarse matrix */
3584   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3585   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3586   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3587   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3588 #if 0
3589   {
3590     PetscViewer viewer;
3591     char filename[256];
3592     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3593     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3594     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3595     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3596     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3597   }
3598 #endif
3599   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3600   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3601   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3603   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3604 
3605   /* compute dofs splitting and neumann boundaries for coarse dofs */
3606   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3607     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3608     const PetscInt         *idxs;
3609     ISLocalToGlobalMapping tmap;
3610 
3611     /* create map between primal indices (in local representative ordering) and local primal numbering */
3612     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3613     /* allocate space for temporary storage */
3614     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3615     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3616     /* allocate for IS array */
3617     nisdofs = pcbddc->n_ISForDofsLocal;
3618     nisneu = !!pcbddc->NeumannBoundariesLocal;
3619     nis = nisdofs + nisneu;
3620     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3621     /* dofs splitting */
3622     for (i=0;i<nisdofs;i++) {
3623       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3624       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3625       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3626       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3627       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3628       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3629       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3630       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3631     }
3632     /* neumann boundaries */
3633     if (pcbddc->NeumannBoundariesLocal) {
3634       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3635       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3636       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3637       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3638       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3639       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3640       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3641       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3642     }
3643     /* free memory */
3644     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3645     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3646     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3647   } else {
3648     nis = 0;
3649     nisdofs = 0;
3650     nisneu = 0;
3651     isarray = NULL;
3652   }
3653   /* destroy no longer needed map */
3654   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3655 
3656   /* restrict on coarse candidates (if needed) */
3657   coarse_mat_is = NULL;
3658   if (csin) {
3659     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3660       if (redist) {
3661         PetscMPIInt rank;
3662         PetscInt spc,n_spc_p1,dest[1];
3663 
3664         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3665         spc = all_procs/pcbddc->redistribute_coarse;
3666         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3667         if (rank > n_spc_p1*(spc+1)-1) {
3668           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3669         } else {
3670           dest[0] = rank/(spc+1);
3671         }
3672         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3673       } else {
3674         PetscInt j,tissize,*nisindices;
3675         PetscInt *coarse_candidates;
3676         const PetscInt* tisindices;
3677         /* get coarse candidates' ranks in pc communicator */
3678         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3679         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3680         for (i=0,j=0;i<all_procs;i++) {
3681           if (!coarse_candidates[i]) {
3682             coarse_candidates[j]=i;
3683             j++;
3684           }
3685         }
3686         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3687         /* get a suitable subassembling pattern */
3688         if (csin_type_simple) {
3689           PetscMPIInt rank;
3690           PetscInt    issize,isidx;
3691           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3692           if (im_active) {
3693             issize = 1;
3694             isidx = (PetscInt)rank;
3695           } else {
3696             issize = 0;
3697             isidx = -1;
3698           }
3699           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3700         } else {
3701           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3702         }
3703         if (pcbddc->dbg_flag) {
3704           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3705           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3706           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3707           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3708           for (i=0;i<j;i++) {
3709             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3710           }
3711           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3712           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3713         }
3714         /* shift the pattern on coarse candidates */
3715         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3716         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3717         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3718         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3719         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3720         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3721         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3722       }
3723     }
3724     if (pcbddc->dbg_flag) {
3725       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3726       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3727       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3728       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3729     }
3730     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3731     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3732   } else {
3733     if (pcbddc->dbg_flag) {
3734       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3735       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3736       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3737     }
3738     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3739     coarse_mat_is = t_coarse_mat_is;
3740   }
3741 
3742   /* create local to global scatters for coarse problem */
3743   if (compute_vecs) {
3744     PetscInt lrows;
3745     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3746     if (coarse_mat_is) {
3747       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3748     } else {
3749       lrows = 0;
3750     }
3751     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3752     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3753     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3754     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3755     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3756   }
3757   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3758   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3759 
3760   /* set defaults for coarse KSP and PC */
3761   if (multilevel_allowed) {
3762     coarse_ksp_type = KSPRICHARDSON;
3763     coarse_pc_type = PCBDDC;
3764   } else {
3765     coarse_ksp_type = KSPPREONLY;
3766     coarse_pc_type = PCREDUNDANT;
3767   }
3768 
3769   /* print some info if requested */
3770   if (pcbddc->dbg_flag) {
3771     if (!multilevel_allowed) {
3772       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3773       if (multilevel_requested) {
3774         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3775       } else if (pcbddc->max_levels) {
3776         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3777       }
3778       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3779     }
3780   }
3781 
3782   /* create the coarse KSP object only once with defaults */
3783   if (coarse_mat_is) {
3784     MatReuse coarse_mat_reuse;
3785     PetscViewer dbg_viewer = NULL;
3786     if (pcbddc->dbg_flag) {
3787       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3788       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3789     }
3790     if (!pcbddc->coarse_ksp) {
3791       char prefix[256],str_level[16];
3792       size_t len;
3793       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3794       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3795       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3796       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3797       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3798       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3799       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3800       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3801       /* prefix */
3802       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3803       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3804       if (!pcbddc->current_level) {
3805         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3806         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3807       } else {
3808         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3809         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3810         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3811         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3812         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3813         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3814       }
3815       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3816       /* allow user customization */
3817       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3818       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3819     }
3820 
3821     /* get some info after set from options */
3822     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3823     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3824     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3825     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3826     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3827       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3828       isbddc = PETSC_FALSE;
3829     }
3830     if (isredundant) {
3831       KSP inner_ksp;
3832       PC inner_pc;
3833       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3834       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3835       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3836     }
3837 
3838     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3839     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3840     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3841     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3842     if (nisdofs) {
3843       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3844       for (i=0;i<nisdofs;i++) {
3845         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3846       }
3847     }
3848     if (nisneu) {
3849       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3850       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3851     }
3852 
3853     /* assemble coarse matrix */
3854     if (coarse_reuse) {
3855       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3856       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3857       coarse_mat_reuse = MAT_REUSE_MATRIX;
3858     } else {
3859       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3860     }
3861     if (isbddc || isnn) {
3862       if (pcbddc->coarsening_ratio > 1) {
3863         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3864           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3865           if (pcbddc->dbg_flag) {
3866             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3867             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3868             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3869             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3870           }
3871         }
3872         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3873       } else {
3874         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3875         coarse_mat = coarse_mat_is;
3876       }
3877     } else {
3878       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3879     }
3880     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3881 
3882     /* propagate symmetry info to coarse matrix */
3883     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3884     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3885 
3886     /* set operators */
3887     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3888     if (pcbddc->dbg_flag) {
3889       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3890     }
3891   } else { /* processes non partecipating to coarse solver (if any) */
3892     coarse_mat = 0;
3893   }
3894   ierr = PetscFree(isarray);CHKERRQ(ierr);
3895 #if 0
3896   {
3897     PetscViewer viewer;
3898     char filename[256];
3899     sprintf(filename,"coarse_mat.m");
3900     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3901     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3902     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3903     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3904   }
3905 #endif
3906 
3907   /* Compute coarse null space (special handling by BDDC only) */
3908   if (pcbddc->NullSpace) {
3909     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3910   }
3911 
3912   if (pcbddc->coarse_ksp) {
3913     Vec crhs,csol;
3914     PetscBool ispreonly;
3915     if (CoarseNullSpace) {
3916       if (isbddc) {
3917         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3918       } else {
3919         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3920       }
3921     }
3922     /* setup coarse ksp */
3923     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3924     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3925     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3926     /* hack */
3927     if (!csol) {
3928       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3929     }
3930     if (!crhs) {
3931       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3932     }
3933     /* Check coarse problem if in debug mode or if solving with an iterative method */
3934     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3935     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3936       KSP       check_ksp;
3937       KSPType   check_ksp_type;
3938       PC        check_pc;
3939       Vec       check_vec,coarse_vec;
3940       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3941       PetscInt  its;
3942       PetscBool compute_eigs;
3943       PetscReal *eigs_r,*eigs_c;
3944       PetscInt  neigs;
3945       const char *prefix;
3946 
3947       /* Create ksp object suitable for estimation of extreme eigenvalues */
3948       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3949       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3950       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3951       if (ispreonly) {
3952         check_ksp_type = KSPPREONLY;
3953         compute_eigs = PETSC_FALSE;
3954       } else {
3955         check_ksp_type = KSPGMRES;
3956         compute_eigs = PETSC_TRUE;
3957       }
3958       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3959       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3960       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3961       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3962       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3963       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3964       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3965       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3966       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3967       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3968       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3969       /* create random vec */
3970       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3971       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3972       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3973       if (CoarseNullSpace) {
3974         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3975       }
3976       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3977       /* solve coarse problem */
3978       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3979       if (CoarseNullSpace) {
3980         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3981       }
3982       /* set eigenvalue estimation if preonly has not been requested */
3983       if (compute_eigs) {
3984         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
3985         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
3986         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3987         lambda_max = eigs_r[neigs-1];
3988         lambda_min = eigs_r[0];
3989         if (pcbddc->use_coarse_estimates) {
3990           if (lambda_max>lambda_min) {
3991             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3992             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3993           }
3994         }
3995       }
3996 
3997       /* check coarse problem residual error */
3998       if (pcbddc->dbg_flag) {
3999         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4000         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4001         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4002         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4003         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4004         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4005         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4006         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4007         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4008         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4009         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4010         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4011         if (compute_eigs) {
4012           PetscReal lambda_max_s,lambda_min_s;
4013           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4014           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4015           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4016           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4017           for (i=0;i<neigs;i++) {
4018             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4019           }
4020         }
4021         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4022         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4023       }
4024       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4025       if (compute_eigs) {
4026         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4027         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4028       }
4029     }
4030   }
4031   /* print additional info */
4032   if (pcbddc->dbg_flag) {
4033     /* waits until all processes reaches this point */
4034     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4035     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4037   }
4038 
4039   /* free memory */
4040   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4041   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 #undef __FUNCT__
4046 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4047 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4048 {
4049   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4050   PC_IS*         pcis = (PC_IS*)pc->data;
4051   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4052   PetscInt       i,coarse_size;
4053   PetscInt       *local_primal_indices;
4054   PetscErrorCode ierr;
4055 
4056   PetscFunctionBegin;
4057   /* Compute global number of coarse dofs */
4058   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4059     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4060   }
4061   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4062 
4063   /* check numbering */
4064   if (pcbddc->dbg_flag) {
4065     PetscScalar coarsesum,*array;
4066     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4067 
4068     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4069     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4070     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4071     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4072     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4073     for (i=0;i<pcbddc->local_primal_size;i++) {
4074       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4075     }
4076     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4077     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4078     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4079     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4080     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4081     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4082     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4083     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4084     for (i=0;i<pcis->n;i++) {
4085       if (array[i] == 1.0) {
4086         set_error = PETSC_TRUE;
4087         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4088       }
4089     }
4090     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4091     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4092     for (i=0;i<pcis->n;i++) {
4093       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4094     }
4095     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4096     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4097     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4098     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4099     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4100     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4101     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4102       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4103       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4104       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4105       for (i=0;i<pcbddc->local_primal_size;i++) {
4106         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4107       }
4108       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4109     }
4110     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4111     if (set_error_reduced) {
4112       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4113     }
4114   }
4115   /* get back data */
4116   *coarse_size_n = coarse_size;
4117   *local_primal_indices_n = local_primal_indices;
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "PCBDDCGlobalToLocal"
4123 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4124 {
4125   IS             localis_t;
4126   PetscInt       i,lsize,*idxs,n;
4127   PetscScalar    *vals;
4128   PetscErrorCode ierr;
4129 
4130   PetscFunctionBegin;
4131   /* get indices in local ordering exploiting local to global map */
4132   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4133   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4134   for (i=0;i<lsize;i++) vals[i] = 1.0;
4135   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4136   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4137   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4138   if (idxs) { /* multilevel guard */
4139     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4140   }
4141   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4142   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4143   ierr = PetscFree(vals);CHKERRQ(ierr);
4144   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4145   /* now compute set in local ordering */
4146   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4147   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4148   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4149   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4150   for (i=0,lsize=0;i<n;i++) {
4151     if (PetscRealPart(vals[i]) > 0.5) {
4152       lsize++;
4153     }
4154   }
4155   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4156   for (i=0,lsize=0;i<n;i++) {
4157     if (PetscRealPart(vals[i]) > 0.5) {
4158       idxs[lsize++] = i;
4159     }
4160   }
4161   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4162   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4163   *localis = localis_t;
4164   PetscFunctionReturn(0);
4165 }
4166 
4167 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4168 #undef __FUNCT__
4169 #define __FUNCT__ "PCBDDCMatMult_Private"
4170 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4171 {
4172   PCBDDCChange_ctx change_ctx;
4173   PetscErrorCode   ierr;
4174 
4175   PetscFunctionBegin;
4176   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4177   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4178   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4179   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4180   PetscFunctionReturn(0);
4181 }
4182 
4183 #undef __FUNCT__
4184 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4185 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4186 {
4187   PCBDDCChange_ctx change_ctx;
4188   PetscErrorCode   ierr;
4189 
4190   PetscFunctionBegin;
4191   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4192   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4193   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4194   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4195   PetscFunctionReturn(0);
4196 }
4197