xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 89c96988850502c5a16c253be42bd9b3da323bd4)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscInt       i;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
54   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
55   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   for (i=0;i<pcbddc->n_ISForDofs;i++) {
59     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
60   }
61   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
62   pcbddc->n_ISForDofs = 0;
63   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
64   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "PCBDDCResetTopography"
70 PetscErrorCode PCBDDCResetTopography(PC pc)
71 {
72   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
77   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
78   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCResetSolvers"
84 PetscErrorCode PCBDDCResetSolvers(PC pc)
85 {
86   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
87   PetscErrorCode ierr;
88 
89   PetscFunctionBegin;
90   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
91   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_constraints,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131   n_R = pcis->n-pcbddc->n_actual_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (pcbddc->local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189   const PetscScalar      *array,*row_cmat_values;
190   const PetscInt         *row_cmat_indices,*idx_R_local;
191   PetscInt               *idx_V_B,*auxindices;
192   PetscInt               n_vertices,n_constraints,size_of_constraint;
193   PetscInt               i,j,n_R,n_D,n_B;
194   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
195   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196   MatType                impMatType;
197   /* some shortcuts to scalars */
198   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199   /* for debugging purposes */
200   PetscReal              *coarsefunctions_errors,*constraints_errors;
201 
202   PetscFunctionBegin;
203   /* get number of vertices (corners plus constraints with change of basis)
204      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205   n_vertices = pcbddc->n_actual_vertices;
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
220   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
221   if (i != n_vertices) {
222     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223   }
224 
225   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226   if (n_constraints) {
227     /* see if we can save some allocations */
228     if (pcbddc->local_auxmat2) {
229       PetscInt on_R,on_constraints;
230       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
231       if (on_R != n_R || on_constraints != n_constraints) {
232         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
233         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
234       }
235     }
236     /* work vectors */
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
239     /* auxiliary matrices */
240     if (!pcbddc->local_auxmat2) {
241       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
242       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
243       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
244       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
245     }
246 
247     /* Extract constraints on R nodes: C_{CR}  */
248     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
249     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
250     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
251 
252     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253     for (i=0;i<n_constraints;i++) {
254       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
255       /* Get row of constraint matrix in R numbering */
256       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
258       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
260       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
261       /* Solve for row of constraint matrix in R numbering */
262       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
263       /* Set values in local_auxmat2 */
264       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
266       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267     }
268     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
271 
272     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
274     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
275     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
276     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
277     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(M1);CHKERRQ(ierr);
279     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
280     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
281     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
282     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
283     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
284     ierr = MatDestroy(&M2);CHKERRQ(ierr);
285     ierr = MatDestroy(&M3);CHKERRQ(ierr);
286     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287     if (!pcbddc->local_auxmat1) {
288       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     } else {
290       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     }
292   }
293 
294   /* Get submatrices from subdomain matrix */
295   if (n_vertices) {
296     PetscInt ibs,mbs;
297     PetscBool issbaij;
298     Mat newmat;
299 
300     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
301     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
302     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
303     if (ibs != mbs) { /* need to convert to SEQAIJ */
304       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
308       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
309     } else {
310       /* this is safe */
311       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
313       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
315         /* which of the two approaches is faster? */
316         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
318         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
320         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
321       } else {
322         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       }
325     }
326     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
327     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
328     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
329   }
330 
331   /* Matrix of coarse basis functions (local) */
332   if (pcbddc->coarse_phi_B) {
333     PetscInt on_B,on_primal;
334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
335     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
337       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
338     }
339   }
340   if (pcbddc->coarse_phi_D) {
341     PetscInt on_D,on_primal;
342     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
343     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
345       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
346     }
347   }
348   if (!pcbddc->coarse_phi_B) {
349     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
350     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
351     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
352     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
353   }
354   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
356     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
357     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
358     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
359   }
360 
361   if (pcbddc->dbg_flag) {
362     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
364     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
365   }
366   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
370 
371   /* vertices */
372   for (i=0;i<n_vertices;i++) {
373     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
375     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
376     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
377     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
378     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
379     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
383     if (n_constraints) {
384       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
385       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
387     }
388     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
389     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
390 
391     /* Set values in coarse basis function and subdomain part of coarse_mat */
392     /* coarse basis functions */
393     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
394     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
400     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
405       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406     }
407     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     if (n_constraints) {
412       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
413       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
414       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     }
416 
417     /* check */
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on local nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
426       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
427       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
429       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       if (n_constraints) {
433         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       }
437       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
440       /* check saddle point solution */
441       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
444       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
445       /* shift by the identity matrix */
446       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
450     }
451   }
452 
453   /* constraints */
454   for (i=0;i<n_constraints;i++) {
455     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
456     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
457     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
458     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
459     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
461     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
462     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
463     if (n_vertices) {
464       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
465     }
466     /* Set values in coarse basis function and subdomain part of coarse_mat */
467     /* coarse basis functions */
468     j = i+n_vertices; /* don't touch this! */
469     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
470     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
474     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481     }
482     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483     if (n_vertices) {
484       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
485       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
487     }
488     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
489     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
491 
492     if (pcbddc->dbg_flag) {
493       /* assemble subdomain vector on nodes */
494       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
495       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
499       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
500       /* assemble subdomain vector of lagrange multipliers */
501       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
502       if (n_vertices) {
503         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
505         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506       }
507       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
513       /* check saddle point solution */
514       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
517       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
518       /* shift by the identity matrix */
519       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
520       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
523     }
524   }
525   /* call assembling routines for local coarse basis */
526   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   }
532 
533   /* compute other basis functions for non-symmetric problems */
534   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
535   if (!setsym || (setsym && !issym)) {
536     if (!pcbddc->coarse_psi_B) {
537       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
538       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
539       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
540       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
541     }
542     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
544       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
545       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
546       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
547     }
548     for (i=0;i<pcbddc->local_primal_size;i++) {
549       if (n_constraints) {
550         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
551         for (j=0;j<n_constraints;j++) {
552           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
553         }
554         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
555         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
556       }
557       if (i<n_vertices) {
558         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
559         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
560         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
561         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
562         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
563         if (n_constraints) {
564           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
565         }
566       } else {
567         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
568       }
569       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
570       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
571       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
575       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       if (i<n_vertices) {
577         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
578       }
579       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
584         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585       }
586 
587       if (pcbddc->dbg_flag) {
588         /* assemble subdomain vector on nodes */
589         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
590         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
592         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         if (i<n_vertices) {
594           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
595         }
596         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
597         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
598         /* assemble subdomain vector of lagrange multipliers */
599         for (j=0;j<pcbddc->local_primal_size;j++) {
600           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
601         }
602         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
603         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
604         /* check saddle point solution */
605         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
608         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
609         /* shift by the identity matrix */
610         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
611         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
614       }
615     }
616     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     }
622   }
623   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
624   /* Checking coarse_sub_mat and coarse basis functios */
625   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
627   if (pcbddc->dbg_flag) {
628     Mat         coarse_sub_mat;
629     Mat         AUXMAT,TM1,TM2,TM3,TM4;
630     Mat         coarse_phi_D,coarse_phi_B;
631     Mat         coarse_psi_D,coarse_psi_B;
632     Mat         A_II,A_BB,A_IB,A_BI;
633     MatType     checkmattype=MATSEQAIJ;
634     PetscReal   real_value;
635 
636     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
639     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
640     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
641     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
642     if (pcbddc->coarse_psi_B) {
643       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
644       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
645     }
646     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
647 
648     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
649     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
650     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
651     if (pcbddc->coarse_psi_B) {
652       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
653       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
654       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
655       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
656       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
657       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
658       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
659       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
660       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
661       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664     } else {
665       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
666       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
667       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
668       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
669       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
670       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673     }
674     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
685     for (i=0;i<pcbddc->local_primal_size;i++) {
686       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
687     }
688     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
689     for (i=0;i<pcbddc->local_primal_size;i++) {
690       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
691     }
692     if (pcbddc->coarse_psi_B) {
693       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
694       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
695         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
696       }
697       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
698       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
699         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
700       }
701     }
702     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
711     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
712     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
713     if (pcbddc->coarse_psi_B) {
714       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
715       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
716     }
717     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
718     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
719     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
720     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
721   }
722   /* free memory */
723   if (n_vertices) {
724     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
725     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
729   }
730   if (n_constraints) {
731     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
732     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
733     ierr = MatDestroy(&M1);CHKERRQ(ierr);
734     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
735   }
736   ierr = PetscFree(auxindices);CHKERRQ(ierr);
737   /* get back data */
738   *coarse_submat_vals_n = coarse_submat_vals;
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
744 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
745 {
746   PC_IS*            pcis = (PC_IS*)(pc->data);
747   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
748   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
749   PetscBool         issbaij,isbaij;
750   /* manage repeated solves */
751   MatReuse          reuse;
752   MatStructure      matstruct;
753   PetscErrorCode    ierr;
754 
755   PetscFunctionBegin;
756   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
757     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
758   }
759   /* get mat flags */
760   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
761   reuse = MAT_INITIAL_MATRIX;
762   if (pc->setupcalled) {
763     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
764     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
765     if (matstruct == SAME_NONZERO_PATTERN) {
766       reuse = MAT_REUSE_MATRIX;
767     } else {
768       reuse = MAT_INITIAL_MATRIX;
769     }
770   }
771   if (reuse == MAT_INITIAL_MATRIX) {
772     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
773     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
774     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
777   }
778 
779   /* transform local matrices if needed */
780   if (pcbddc->use_change_of_basis) {
781     Mat         change_mat_all;
782     PetscScalar *row_cmat_values;
783     PetscInt    *row_cmat_indices;
784     PetscInt    *nnz,*is_indices,*temp_indices;
785     PetscInt    i,j,k,n_D,n_B;
786 
787     /* Get Non-overlapping dimensions */
788     n_B = pcis->n_B;
789     n_D = pcis->n-n_B;
790 
791     /* compute nonzero structure of change of basis on all local nodes */
792     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
793     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
794     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
795     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
797     k=1;
798     for (i=0;i<n_B;i++) {
799       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
800       nnz[is_indices[i]]=j;
801       if (k < j) k = j;
802       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
803     }
804     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
805     /* assemble change of basis matrix on the whole set of local dofs */
806     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
807     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
808     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
809     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
810     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
811     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
812     for (i=0;i<n_D;i++) {
813       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
814     }
815     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
817     for (i=0;i<n_B;i++) {
818       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
819       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
820       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
821       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
822     }
823     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
825     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
826     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
827     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
828     if (!issbaij && !isbaij) {
829       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
830     } else {
831       Mat work_mat;
832       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
833       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
834       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
835     }
836     /*
837     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
838     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
839     */
840     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
841     ierr = PetscFree(nnz);CHKERRQ(ierr);
842     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
843   } else {
844     /* without change of basis, the local matrix is unchanged */
845     if (!pcbddc->local_mat) {
846       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
847       pcbddc->local_mat = matis->A;
848     }
849   }
850 
851   /* get submatrices */
852   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
853   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
854   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
855   if (!issbaij) {
856     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
857     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
858   } else {
859     Mat newmat;
860     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
861     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
862     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
863     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
864   }
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
870 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
871 {
872   PC_IS*         pcis = (PC_IS*)(pc->data);
873   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
874   IS             is_aux1,is_aux2;
875   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
876   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
877   PetscInt       vbs,bs;
878   PetscBT        bitmask;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   /* No need to setup local scatters if primal space is unchanged */
883   if (!pcbddc->new_primal_space_local) {
884     PetscFunctionReturn(0);
885   }
886   /* destroy old objects */
887   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
888   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
889   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
890   /* Set Non-overlapping dimensions */
891   n_B = pcis->n_B; n_D = pcis->n - n_B;
892   n_vertices = pcbddc->n_actual_vertices;
893   /* create auxiliary bitmask */
894   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
895   for (i=0;i<n_vertices;i++) {
896     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
897   }
898 
899   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
900   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
901   for (i=0, n_R=0; i<pcis->n; i++) {
902     if (!PetscBTLookup(bitmask,i)) {
903       idx_R_local[n_R] = i;
904       n_R++;
905     }
906   }
907 
908   /* Block code */
909   vbs = 1;
910   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
911   if (bs>1 && !(n_vertices%bs)) {
912     PetscBool is_blocked = PETSC_TRUE;
913     PetscInt  *vary;
914     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
915     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
916     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
917     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
918     for (i=0; i<n_vertices; i++) {
919       if (vary[i]!=0 && vary[i]!=bs) {
920         is_blocked = PETSC_FALSE;
921         break;
922       }
923     }
924     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
925       vbs = bs;
926       for (i=0;i<n_R/vbs;i++) {
927         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
928       }
929     }
930     ierr = PetscFree(vary);CHKERRQ(ierr);
931   }
932   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
933   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
934 
935   /* print some info if requested */
936   if (pcbddc->dbg_flag) {
937     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
938     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
939     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
940     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
941     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
942     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
943     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
944     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
945   }
946 
947   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
948   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
949   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
950   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
951   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
952   for (i=0; i<n_D; i++) {
953     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
954   }
955   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
956   for (i=0, j=0; i<n_R; i++) {
957     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
958       aux_array1[j++] = i;
959     }
960   }
961   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
962   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
963   for (i=0, j=0; i<n_B; i++) {
964     if (!PetscBTLookup(bitmask,is_indices[i])) {
965       aux_array2[j++] = i;
966     }
967   }
968   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
969   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
970   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
971   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
972   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
973 
974   if (pcbddc->switch_static || pcbddc->dbg_flag) {
975     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
976     for (i=0, j=0; i<n_R; i++) {
977       if (PetscBTLookup(bitmask,idx_R_local[i])) {
978         aux_array1[j++] = i;
979       }
980     }
981     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
982     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
983     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
984   }
985   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
986   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
993 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
994 {
995   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
996   PC_IS          *pcis = (PC_IS*)pc->data;
997   PC             pc_temp;
998   Mat            A_RR;
999   MatStructure   matstruct;
1000   MatReuse       reuse;
1001   PetscScalar    m_one = -1.0;
1002   PetscReal      value;
1003   PetscInt       n_D,n_R,ibs,mbs;
1004   PetscBool      use_exact,use_exact_reduced,issbaij;
1005   PetscErrorCode ierr;
1006   /* prefixes stuff */
1007   char           dir_prefix[256],neu_prefix[256],str_level[3];
1008   size_t         len;
1009 
1010   PetscFunctionBegin;
1011   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1012 
1013   /* compute prefixes */
1014   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1015   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1016   if (!pcbddc->current_level) {
1017     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1018     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1019     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1020     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1021   } else {
1022     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1023     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1024     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1025     len -= 15; /* remove "pc_bddc_coarse_" */
1026     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1027     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1028     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1029     *(dir_prefix+len)='\0';
1030     *(neu_prefix+len)='\0';
1031     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1032     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1033     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1034     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1035   }
1036 
1037   /* DIRICHLET PROBLEM */
1038   /* Matrix for Dirichlet problem is pcis->A_II */
1039   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1040   if (!pcbddc->ksp_D) { /* create object if not yet build */
1041     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1042     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1043     /* default */
1044     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1045     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1046     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1047     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1048     if (issbaij) {
1049       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1050     } else {
1051       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1052     }
1053     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1054   }
1055   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1056   /* Allow user's customization */
1057   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1058   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1059   if (!n_D) {
1060     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1061     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1062   }
1063   /* Set Up KSP for Dirichlet problem of BDDC */
1064   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1065   /* set ksp_D into pcis data */
1066   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1067   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1068   pcis->ksp_D = pcbddc->ksp_D;
1069 
1070   /* NEUMANN PROBLEM */
1071   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1072   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1073   if (pcbddc->ksp_R) { /* already created ksp */
1074     PetscInt nn_R;
1075     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1076     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1077     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1078     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1079       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1080       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1081       reuse = MAT_INITIAL_MATRIX;
1082     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1083       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1084         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1085         reuse = MAT_INITIAL_MATRIX;
1086       } else { /* safe to reuse the matrix */
1087         reuse = MAT_REUSE_MATRIX;
1088       }
1089     }
1090     /* last check */
1091     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1092       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1093       reuse = MAT_INITIAL_MATRIX;
1094     }
1095   } else { /* first time, so we need to create the matrix */
1096     reuse = MAT_INITIAL_MATRIX;
1097   }
1098   /* extract A_RR */
1099   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1100   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1101   if (ibs != mbs) {
1102     Mat newmat;
1103     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1104     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1105     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1106   } else {
1107     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1108   }
1109   if (!pcbddc->ksp_R) { /* create object if not present */
1110     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1111     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1112     /* default */
1113     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1114     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1115     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1116     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1117     if (issbaij) {
1118       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1119     } else {
1120       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1121     }
1122     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1123   }
1124   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1125   /* Allow user's customization */
1126   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1127   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1128   if (!n_R) {
1129     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1130     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1131   }
1132   /* Set Up KSP for Neumann problem of BDDC */
1133   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1134 
1135   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1136   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1137     /* Dirichlet */
1138     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1139     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1140     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1141     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1142     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1143     /* need to be adapted? */
1144     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1145     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1146     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1147     /* print info */
1148     if (pcbddc->dbg_flag) {
1149       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1150       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1152       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1153       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1154       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1155     }
1156     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1157       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1158     }
1159 
1160     /* Neumann */
1161     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1162     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1163     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1164     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1165     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1166     /* need to be adapted? */
1167     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1168     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1169     /* print info */
1170     if (pcbddc->dbg_flag) {
1171       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1172       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1173     }
1174     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1175       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1176     }
1177   }
1178   /* free Neumann problem's matrix */
1179   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1185 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1186 {
1187   PetscErrorCode ierr;
1188   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1189 
1190   PetscFunctionBegin;
1191   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1192   if (pcbddc->local_auxmat1) {
1193     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1194     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1201 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1202 {
1203   PetscErrorCode ierr;
1204   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1205   PC_IS*            pcis = (PC_IS*)  (pc->data);
1206   const PetscScalar zero = 0.0;
1207 
1208   PetscFunctionBegin;
1209   /* Application of PHI^T (or PSI^T)  */
1210   if (pcbddc->coarse_psi_B) {
1211     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1212     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1213   } else {
1214     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1215     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1216   }
1217   /* Scatter data of coarse_rhs */
1218   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1219   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220 
1221   /* Local solution on R nodes */
1222   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1223   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   if (pcbddc->switch_static) {
1226     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1227     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   }
1229   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1230   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1231   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233   if (pcbddc->switch_static) {
1234     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236   }
1237 
1238   /* Coarse solution */
1239   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1241     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1242   }
1243   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1244   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1245 
1246   /* Sum contributions from two levels */
1247   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1248   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1254 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1255 {
1256   PetscErrorCode ierr;
1257   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1258 
1259   PetscFunctionBegin;
1260   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1266 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1267 {
1268   PetscErrorCode ierr;
1269   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1270 
1271   PetscFunctionBegin;
1272   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* uncomment for testing purposes */
1277 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1278 #undef __FUNCT__
1279 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1280 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1281 {
1282   PetscErrorCode    ierr;
1283   PC_IS*            pcis = (PC_IS*)(pc->data);
1284   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1285   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1286   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1287   MatType           impMatType=MATSEQAIJ;
1288   /* one and zero */
1289   PetscScalar       one=1.0,zero=0.0;
1290   /* space to store constraints and their local indices */
1291   PetscScalar       *temp_quadrature_constraint;
1292   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1293   /* iterators */
1294   PetscInt          i,j,k,total_counts,temp_start_ptr;
1295   /* stuff to store connected components stored in pcbddc->mat_graph */
1296   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1297   PetscInt          n_ISForFaces,n_ISForEdges;
1298   /* near null space stuff */
1299   MatNullSpace      nearnullsp;
1300   const Vec         *nearnullvecs;
1301   Vec               *localnearnullsp;
1302   PetscBool         nnsp_has_cnst;
1303   PetscInt          nnsp_size;
1304   PetscScalar       *array;
1305   /* BLAS integers */
1306   PetscBLASInt      lwork,lierr;
1307   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1308   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1309   /* LAPACK working arrays for SVD or POD */
1310   PetscBool         skip_lapack;
1311   PetscScalar       *work;
1312   PetscReal         *singular_vals;
1313 #if defined(PETSC_USE_COMPLEX)
1314   PetscReal         *rwork;
1315 #endif
1316 #if defined(PETSC_MISSING_LAPACK_GESVD)
1317   PetscBLASInt      Blas_one_2=1;
1318   PetscScalar       *temp_basis,*correlation_mat;
1319 #else
1320   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1321   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1322 #endif
1323   /* reuse */
1324   PetscInt          olocal_primal_size;
1325   PetscInt          *oprimal_indices_local_idxs;
1326   /* change of basis */
1327   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1328   PetscBool         boolforchange,qr_needed;
1329   PetscBT           touched,change_basis,qr_needed_idx;
1330   /* auxiliary stuff */
1331   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1332   /* some quantities */
1333   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1334   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1335 
1336 
1337   PetscFunctionBegin;
1338   /* Destroy Mat objects computed previously */
1339   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1340   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1341   /* Get index sets for faces, edges and vertices from graph */
1342   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1343     pcbddc->use_vertices = PETSC_TRUE;
1344   }
1345   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1346   /* HACK: provide functions to set change of basis */
1347   if (!ISForVertices && pcbddc->NullSpace) {
1348     pcbddc->use_change_of_basis = PETSC_TRUE;
1349     pcbddc->use_change_on_faces = PETSC_FALSE;
1350   }
1351   /* print some info */
1352   if (pcbddc->dbg_flag) {
1353     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1354     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1355     i = 0;
1356     if (ISForVertices) {
1357       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1361     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1362     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1363   }
1364   /* check if near null space is attached to global mat */
1365   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1366   if (nearnullsp) {
1367     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1368     /* remove any stored info */
1369     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1370     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1371     /* store information for BDDC solver reuse */
1372     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1373     pcbddc->onearnullspace = nearnullsp;
1374     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1375     for (i=0;i<nnsp_size;i++) {
1376       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1377     }
1378   } else { /* if near null space is not provided BDDC uses constants by default */
1379     nnsp_size = 0;
1380     nnsp_has_cnst = PETSC_TRUE;
1381   }
1382   /* get max number of constraints on a single cc */
1383   max_constraints = nnsp_size;
1384   if (nnsp_has_cnst) max_constraints++;
1385 
1386   /*
1387        Evaluate maximum storage size needed by the procedure
1388        - temp_indices will contain start index of each constraint stored as follows
1389        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1390        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1391        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1392                                                                                                                                                          */
1393   total_counts = n_ISForFaces+n_ISForEdges;
1394   total_counts *= max_constraints;
1395   n_vertices = 0;
1396   if (ISForVertices) {
1397     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1398   }
1399   total_counts += n_vertices;
1400   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1401   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1402   total_counts = 0;
1403   max_size_of_constraint = 0;
1404   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1405     if (i<n_ISForEdges) {
1406       used_IS = &ISForEdges[i];
1407     } else {
1408       used_IS = &ISForFaces[i-n_ISForEdges];
1409     }
1410     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1411     total_counts += j;
1412     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1413   }
1414   total_counts *= max_constraints;
1415   total_counts += n_vertices;
1416   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1417   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1418   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1419   /* get local part of global near null space vectors */
1420   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1421   for (k=0;k<nnsp_size;k++) {
1422     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1423     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1425   }
1426 
1427   /* whether or not to skip lapack calls */
1428   skip_lapack = PETSC_TRUE;
1429   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1430 
1431   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1432   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1433     PetscScalar temp_work;
1434 #if defined(PETSC_MISSING_LAPACK_GESVD)
1435     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1436     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1437     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1438     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1439 #if defined(PETSC_USE_COMPLEX)
1440     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1441 #endif
1442     /* now we evaluate the optimal workspace using query with lwork=-1 */
1443     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1444     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1445     lwork = -1;
1446     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1447 #if !defined(PETSC_USE_COMPLEX)
1448     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1449 #else
1450     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1451 #endif
1452     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1453     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1454 #else /* on missing GESVD */
1455     /* SVD */
1456     PetscInt max_n,min_n;
1457     max_n = max_size_of_constraint;
1458     min_n = max_constraints;
1459     if (max_size_of_constraint < max_constraints) {
1460       min_n = max_size_of_constraint;
1461       max_n = max_constraints;
1462     }
1463     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1464 #if defined(PETSC_USE_COMPLEX)
1465     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1466 #endif
1467     /* now we evaluate the optimal workspace using query with lwork=-1 */
1468     lwork = -1;
1469     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1470     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1471     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1472     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1473 #if !defined(PETSC_USE_COMPLEX)
1474     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1475 #else
1476     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1477 #endif
1478     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1479     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1480 #endif /* on missing GESVD */
1481     /* Allocate optimal workspace */
1482     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1483     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1484   }
1485   /* Now we can loop on constraining sets */
1486   total_counts = 0;
1487   temp_indices[0] = 0;
1488   /* vertices */
1489   if (ISForVertices) {
1490     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1491     if (nnsp_has_cnst) { /* consider all vertices */
1492       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1493       for (i=0;i<n_vertices;i++) {
1494         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1495         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1496         total_counts++;
1497       }
1498     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1499       PetscBool used_vertex;
1500       for (i=0;i<n_vertices;i++) {
1501         used_vertex = PETSC_FALSE;
1502         k = 0;
1503         while (!used_vertex && k<nnsp_size) {
1504           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1505           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1506             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1507             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1508             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1509             total_counts++;
1510             used_vertex = PETSC_TRUE;
1511           }
1512           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1513           k++;
1514         }
1515       }
1516     }
1517     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1518     n_vertices = total_counts;
1519   }
1520 
1521   /* edges and faces */
1522   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1523     if (i<n_ISForEdges) {
1524       used_IS = &ISForEdges[i];
1525       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1526     } else {
1527       used_IS = &ISForFaces[i-n_ISForEdges];
1528       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1529     }
1530     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1531     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1532     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1533     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1534     /* change of basis should not be performed on local periodic nodes */
1535     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1536     if (nnsp_has_cnst) {
1537       PetscScalar quad_value;
1538       temp_constraints++;
1539       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1540       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1541       for (j=0;j<size_of_constraint;j++) {
1542         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1543       }
1544       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1545       total_counts++;
1546     }
1547     for (k=0;k<nnsp_size;k++) {
1548       PetscReal real_value;
1549       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1550       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1551       for (j=0;j<size_of_constraint;j++) {
1552         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1553       }
1554       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1555       /* check if array is null on the connected component */
1556       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1557       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1558       if (real_value > 0.0) { /* keep indices and values */
1559         temp_constraints++;
1560         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1561         total_counts++;
1562       }
1563     }
1564     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1565     valid_constraints = temp_constraints;
1566     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1567     if (!pcbddc->use_nnsp_true && temp_constraints) {
1568       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1569 
1570 #if defined(PETSC_MISSING_LAPACK_GESVD)
1571       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1572          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1573          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1574             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1575             from that computed using LAPACKgesvd
1576          -> This is due to a different computation of eigenvectors in LAPACKheev
1577          -> The quality of the POD-computed basis will be the same */
1578       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1579       /* Store upper triangular part of correlation matrix */
1580       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1581       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1582       for (j=0;j<temp_constraints;j++) {
1583         for (k=0;k<j+1;k++) {
1584           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1585         }
1586       }
1587       /* compute eigenvalues and eigenvectors of correlation matrix */
1588       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1589       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1590 #if !defined(PETSC_USE_COMPLEX)
1591       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1592 #else
1593       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1594 #endif
1595       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1596       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1597       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1598       j=0;
1599       while (j < temp_constraints && singular_vals[j] < tol) j++;
1600       total_counts=total_counts-j;
1601       valid_constraints = temp_constraints-j;
1602       /* scale and copy POD basis into used quadrature memory */
1603       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1604       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1605       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1606       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1607       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1608       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1609       if (j<temp_constraints) {
1610         PetscInt ii;
1611         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1612         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1613         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1614         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1615         for (k=0;k<temp_constraints-j;k++) {
1616           for (ii=0;ii<size_of_constraint;ii++) {
1617             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1618           }
1619         }
1620       }
1621 #else  /* on missing GESVD */
1622       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1623       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1624       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1625       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1626 #if !defined(PETSC_USE_COMPLEX)
1627       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1628 #else
1629       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1630 #endif
1631       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1632       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1633       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1634       k = temp_constraints;
1635       if (k > size_of_constraint) k = size_of_constraint;
1636       j = 0;
1637       while (j < k && singular_vals[k-j-1] < tol) j++;
1638       total_counts = total_counts-temp_constraints+k-j;
1639       valid_constraints = k-j;
1640 #endif /* on missing GESVD */
1641     }
1642     /* setting change_of_basis flag is safe now */
1643     if (boolforchange) {
1644       for (j=0;j<valid_constraints;j++) {
1645         PetscBTSet(change_basis,total_counts-j-1);
1646       }
1647     }
1648   }
1649   /* free index sets of faces, edges and vertices */
1650   for (i=0;i<n_ISForFaces;i++) {
1651     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1652   }
1653   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1654   for (i=0;i<n_ISForEdges;i++) {
1655     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1656   }
1657   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1658   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1659   /* map temp_indices_to_constraint in boundary numbering */
1660   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1661   if (i != temp_indices[total_counts]) {
1662     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1663   }
1664 
1665   /* free workspace */
1666   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1667     ierr = PetscFree(work);CHKERRQ(ierr);
1668 #if defined(PETSC_USE_COMPLEX)
1669     ierr = PetscFree(rwork);CHKERRQ(ierr);
1670 #endif
1671     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1672 #if defined(PETSC_MISSING_LAPACK_GESVD)
1673     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1674     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1675 #endif
1676   }
1677   for (k=0;k<nnsp_size;k++) {
1678     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1679   }
1680   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1681 
1682   /* set quantities in pcbddc data structure and store previous primal size */
1683   /* n_vertices defines the number of subdomain corners in the primal space */
1684   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1685   olocal_primal_size = pcbddc->local_primal_size;
1686   pcbddc->local_primal_size = total_counts;
1687   pcbddc->n_vertices = n_vertices;
1688   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1689 
1690   /* Create constraint matrix */
1691   /* The constraint matrix is used to compute the l2g map of primal dofs */
1692   /* so we need to set it up properly either with or without change of basis */
1693   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1694   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1695   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1696   /* array to compute a local numbering of constraints : vertices first then constraints */
1697   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1698   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1699   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1700   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1701   /* auxiliary stuff for basis change */
1702   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1703   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1704 
1705   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1706   total_primal_vertices=0;
1707   for (i=0;i<pcbddc->local_primal_size;i++) {
1708     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1709     if (size_of_constraint == 1) {
1710       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1711       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1712       aux_primal_minloc[total_primal_vertices]=0;
1713       total_primal_vertices++;
1714     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1715       PetscInt min_loc,min_index;
1716       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1717       /* find first untouched local node */
1718       k = 0;
1719       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1720       min_index = global_indices[k];
1721       min_loc = k;
1722       /* search the minimum among global nodes already untouched on the cc */
1723       for (k=1;k<size_of_constraint;k++) {
1724         /* there can be more than one constraint on a single connected component */
1725         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1726           min_index = global_indices[k];
1727           min_loc = k;
1728         }
1729       }
1730       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1731       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1732       aux_primal_minloc[total_primal_vertices]=min_loc;
1733       total_primal_vertices++;
1734     }
1735   }
1736   /* determine if a QR strategy is needed for change of basis */
1737   qr_needed = PETSC_FALSE;
1738   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1739   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1740     if (PetscBTLookup(change_basis,i)) {
1741       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1742       j = 0;
1743       for (k=0;k<size_of_constraint;k++) {
1744         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1745           j++;
1746         }
1747       }
1748       /* found more than one primal dof on the cc */
1749       if (j > 1) {
1750         PetscBTSet(qr_needed_idx,i);
1751         qr_needed = PETSC_TRUE;
1752       }
1753     }
1754   }
1755   /* free workspace */
1756   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1757 
1758   /* permute indices in order to have a sorted set of vertices */
1759   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1760 
1761   /* nonzero structure of constraint matrix */
1762   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1763   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1764   j=total_primal_vertices;
1765   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1766     if (!PetscBTLookup(change_basis,i)) {
1767       nnz[j]=temp_indices[i+1]-temp_indices[i];
1768       j++;
1769     }
1770   }
1771   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1772   ierr = PetscFree(nnz);CHKERRQ(ierr);
1773   /* set values in constraint matrix */
1774   for (i=0;i<total_primal_vertices;i++) {
1775     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1776   }
1777   total_counts = total_primal_vertices;
1778   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1779     if (!PetscBTLookup(change_basis,i)) {
1780       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1781       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1782       total_counts++;
1783     }
1784   }
1785   /* assembling */
1786   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1787   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1788   /*
1789   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1790   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1791   */
1792   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1793   if (pcbddc->use_change_of_basis) {
1794     /* dual and primal dofs on a single cc */
1795     PetscInt     dual_dofs,primal_dofs;
1796     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1797     PetscInt     primal_counter;
1798     /* working stuff for GEQRF */
1799     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1800     PetscBLASInt lqr_work;
1801     /* working stuff for UNGQR */
1802     PetscScalar  *gqr_work,lgqr_work_t;
1803     PetscBLASInt lgqr_work;
1804     /* working stuff for TRTRS */
1805     PetscScalar  *trs_rhs;
1806     PetscBLASInt Blas_NRHS;
1807     /* pointers for values insertion into change of basis matrix */
1808     PetscInt     *start_rows,*start_cols;
1809     PetscScalar  *start_vals;
1810     /* working stuff for values insertion */
1811     PetscBT      is_primal;
1812 
1813     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1814     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1815     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1816     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1817     /* work arrays */
1818     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1819     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1820     /* nonzeros per row */
1821     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1822       if (PetscBTLookup(change_basis,i)) {
1823         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1824         if (PetscBTLookup(qr_needed_idx,i)) {
1825           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1826         } else {
1827           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1828           /* get local primal index on the cc */
1829           j = 0;
1830           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1831           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1832         }
1833       }
1834     }
1835     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1836     ierr = PetscFree(nnz);CHKERRQ(ierr);
1837     /* Set initial identity in the matrix */
1838     for (i=0;i<pcis->n_B;i++) {
1839       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1840     }
1841 
1842     if (pcbddc->dbg_flag) {
1843       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1844       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1845     }
1846 
1847 
1848     /* Now we loop on the constraints which need a change of basis */
1849     /*
1850        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1851        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1852 
1853        Basic blocks of change of basis matrix T computed
1854 
1855           - Using the following block transformation if there is only a primal dof on the cc
1856             (in the example, primal dof is the last one of the edge in LOCAL ordering
1857              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1858             | 1        0   ...        0     1 |
1859             | 0        1   ...        0     1 |
1860             |              ...                |
1861             | 0        ...            1     1 |
1862             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1863 
1864           - via QR decomposition of constraints otherwise
1865     */
1866     if (qr_needed) {
1867       /* space to store Q */
1868       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1869       /* first we issue queries for optimal work */
1870       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1871       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1872       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1873       lqr_work = -1;
1874       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1875       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1876       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1877       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1878       lgqr_work = -1;
1879       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1880       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1881       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1882       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1883       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1884       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1885       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1886       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1887       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1888       /* array to store scaling factors for reflectors */
1889       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1890       /* array to store rhs and solution of triangular solver */
1891       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1892       /* allocating workspace for check */
1893       if (pcbddc->dbg_flag) {
1894         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1895       }
1896     }
1897     /* array to store whether a node is primal or not */
1898     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1899     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1900     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1901     if (i != total_primal_vertices) {
1902       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1903     }
1904     for (i=0;i<total_primal_vertices;i++) {
1905       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1906     }
1907     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1908 
1909     /* loop on constraints and see whether or not they need a change of basis and compute it */
1910     /* -> using implicit ordering contained in temp_indices data */
1911     total_counts = pcbddc->n_vertices;
1912     primal_counter = total_counts;
1913     while (total_counts<pcbddc->local_primal_size) {
1914       primal_dofs = 1;
1915       if (PetscBTLookup(change_basis,total_counts)) {
1916         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1917         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1918           primal_dofs++;
1919         }
1920         /* get constraint info */
1921         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1922         dual_dofs = size_of_constraint-primal_dofs;
1923 
1924         if (pcbddc->dbg_flag) {
1925           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1926         }
1927 
1928         if (primal_dofs > 1) { /* QR */
1929 
1930           /* copy quadrature constraints for change of basis check */
1931           if (pcbddc->dbg_flag) {
1932             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1933           }
1934           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1935           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1936 
1937           /* compute QR decomposition of constraints */
1938           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1939           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1940           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1941           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1942           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1943           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1944           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1945 
1946           /* explictly compute R^-T */
1947           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1948           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1949           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1950           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1951           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1952           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1953           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1954           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1955           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1956           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1957 
1958           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1959           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1960           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1961           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1962           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1963           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1964           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1965           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1966           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1967 
1968           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1969              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1970              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1971           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1975           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1976           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1977           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1978           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1979           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1980           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1981 
1982           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1983           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1984           /* insert cols for primal dofs */
1985           for (j=0;j<primal_dofs;j++) {
1986             start_vals = &qr_basis[j*size_of_constraint];
1987             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1988             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1989           }
1990           /* insert cols for dual dofs */
1991           for (j=0,k=0;j<dual_dofs;k++) {
1992             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1993               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1994               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1995               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1996               j++;
1997             }
1998           }
1999 
2000           /* check change of basis */
2001           if (pcbddc->dbg_flag) {
2002             PetscInt   ii,jj;
2003             PetscBool valid_qr=PETSC_TRUE;
2004             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2005             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2006             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2007             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2008             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2009             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2010             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2011             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2012             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2013             for (jj=0;jj<size_of_constraint;jj++) {
2014               for (ii=0;ii<primal_dofs;ii++) {
2015                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2016                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2017               }
2018             }
2019             if (!valid_qr) {
2020               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2021               for (jj=0;jj<size_of_constraint;jj++) {
2022                 for (ii=0;ii<primal_dofs;ii++) {
2023                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2024                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2025                   }
2026                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2027                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2028                   }
2029                 }
2030               }
2031             } else {
2032               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2033             }
2034           }
2035         } else { /* simple transformation block */
2036           PetscInt row,col;
2037           PetscScalar val;
2038           for (j=0;j<size_of_constraint;j++) {
2039             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2040             if (!PetscBTLookup(is_primal,row)) {
2041               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2042               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2043               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2044             } else {
2045               for (k=0;k<size_of_constraint;k++) {
2046                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2047                 if (row != col) {
2048                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2049                 } else {
2050                   val = 1.0;
2051                 }
2052                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2053               }
2054             }
2055           }
2056         }
2057         /* increment primal counter */
2058         primal_counter += primal_dofs;
2059       } else {
2060         if (pcbddc->dbg_flag) {
2061           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2062         }
2063       }
2064       /* increment constraint counter total_counts */
2065       total_counts += primal_dofs;
2066     }
2067 
2068     /* free workspace */
2069     if (qr_needed) {
2070       if (pcbddc->dbg_flag) {
2071         ierr = PetscFree(work);CHKERRQ(ierr);
2072       }
2073       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2074       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2075       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2076       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2077       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2078     }
2079     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2080     /* assembling */
2081     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083     /*
2084     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2085     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2086     */
2087   }
2088 
2089   /* get indices in local ordering for vertices and constraints */
2090   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2091     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2092     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2093   }
2094   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2095   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2096   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2097   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2098   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2099   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2100   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2101   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2102   /* set quantities in PCBDDC data struct */
2103   pcbddc->n_actual_vertices = i;
2104   /* check if a new primal space has been introduced */
2105   pcbddc->new_primal_space_local = PETSC_TRUE;
2106   if (olocal_primal_size == pcbddc->local_primal_size) {
2107     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2108     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2109     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2110   }
2111   /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */
2112   if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE;
2113   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2114   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2115 
2116   /* flush dbg viewer */
2117   if (pcbddc->dbg_flag) {
2118     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2119   }
2120 
2121   /* free workspace */
2122   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2123   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2124   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2125   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2126   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2127   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2128   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2129   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2135 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2136 {
2137   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2138   PC_IS       *pcis = (PC_IS*)pc->data;
2139   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2140   PetscInt    bs,ierr,i,vertex_size;
2141   PetscViewer viewer=pcbddc->dbg_viewer;
2142 
2143   PetscFunctionBegin;
2144   /* Reset previously computed graph */
2145   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2146   /* Init local Graph struct */
2147   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2148 
2149   /* Check validity of the csr graph passed in by the user */
2150   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2151     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2152   }
2153 
2154   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2155   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2156     Mat mat_adj;
2157     const PetscInt *xadj,*adjncy;
2158     PetscBool flg_row=PETSC_TRUE;
2159 
2160     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2161     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2162     if (!flg_row) {
2163       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2164     }
2165     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2166     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2167     if (!flg_row) {
2168       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2169     }
2170     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2171   }
2172 
2173   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2174   vertex_size = 1;
2175   if (!pcbddc->user_provided_isfordofs) {
2176     if (!pcbddc->n_ISForDofs) {
2177       IS *custom_ISForDofs;
2178 
2179       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2180       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2181       for (i=0;i<bs;i++) {
2182         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2183       }
2184       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2185       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2186       /* remove my references to IS objects */
2187       for (i=0;i<bs;i++) {
2188         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2189       }
2190       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2191     }
2192   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2193     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2194   }
2195 
2196   /* Setup of Graph */
2197   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2198 
2199   /* Graph's connected components analysis */
2200   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2201 
2202   /* print some info to stdout */
2203   if (pcbddc->dbg_flag) {
2204     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2205   }
2206 
2207   /* mark topography has done */
2208   pcbddc->recompute_topography = PETSC_FALSE;
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2214 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2215 {
2216   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2217   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   n = 0;
2222   vertices = 0;
2223   if (pcbddc->ConstraintMatrix) {
2224     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2225     for (i=0;i<local_primal_size;i++) {
2226       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2227       if (size_of_constraint == 1) n++;
2228       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2229     }
2230     if (vertices_idx) {
2231       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2232       n = 0;
2233       for (i=0;i<local_primal_size;i++) {
2234         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2235         if (size_of_constraint == 1) {
2236           vertices[n++]=row_cmat_indices[0];
2237         }
2238         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2239       }
2240     }
2241   }
2242   *n_vertices = n;
2243   if (vertices_idx) *vertices_idx = vertices;
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2249 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2250 {
2251   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2252   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2253   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2254   PetscBT        touched;
2255   PetscErrorCode ierr;
2256 
2257     /* This function assumes that the number of local constraints per connected component
2258        is not greater than the number of nodes defined for the connected component
2259        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2260   PetscFunctionBegin;
2261   n = 0;
2262   constraints_index = 0;
2263   if (pcbddc->ConstraintMatrix) {
2264     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2265     max_size_of_constraint = 0;
2266     for (i=0;i<local_primal_size;i++) {
2267       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2268       if (size_of_constraint > 1) {
2269         n++;
2270       }
2271       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2272       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2273     }
2274     if (constraints_idx) {
2275       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2276       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2277       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2278       n = 0;
2279       for (i=0;i<local_primal_size;i++) {
2280         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2281         if (size_of_constraint > 1) {
2282           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2283           /* find first untouched local node */
2284           j = 0;
2285           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2286           min_index = row_cmat_global_indices[j];
2287           min_loc = j;
2288           /* search the minimum among nodes not yet touched on the connected component
2289              since there can be more than one constraint on a single cc */
2290           for (j=1;j<size_of_constraint;j++) {
2291             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2292               min_index = row_cmat_global_indices[j];
2293               min_loc = j;
2294             }
2295           }
2296           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2297           constraints_index[n++] = row_cmat_indices[min_loc];
2298         }
2299         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2300       }
2301       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2302       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2303     }
2304   }
2305   *n_constraints = n;
2306   if (constraints_idx) *constraints_idx = constraints_index;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 /* the next two functions has been adapted from pcis.c */
2311 #undef __FUNCT__
2312 #define __FUNCT__ "PCBDDCApplySchur"
2313 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2314 {
2315   PetscErrorCode ierr;
2316   PC_IS          *pcis = (PC_IS*)(pc->data);
2317 
2318   PetscFunctionBegin;
2319   if (!vec2_B) { vec2_B = v; }
2320   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2321   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2322   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2323   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2324   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2330 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2331 {
2332   PetscErrorCode ierr;
2333   PC_IS          *pcis = (PC_IS*)(pc->data);
2334 
2335   PetscFunctionBegin;
2336   if (!vec2_B) { vec2_B = v; }
2337   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2338   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2339   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2340   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2341   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "PCBDDCSubsetNumbering"
2347 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2348 {
2349   Vec            local_vec,global_vec;
2350   IS             seqis,paris;
2351   VecScatter     scatter_ctx;
2352   PetscScalar    *array;
2353   PetscInt       *temp_global_dofs;
2354   PetscScalar    globalsum;
2355   PetscInt       i,j,s;
2356   PetscInt       nlocals,first_index,old_index,max_local;
2357   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2358   PetscMPIInt    *dof_sizes,*dof_displs;
2359   PetscBool      first_found;
2360   PetscErrorCode ierr;
2361 
2362   PetscFunctionBegin;
2363   /* mpi buffers */
2364   MPI_Comm_size(comm,&size_prec_comm);
2365   MPI_Comm_rank(comm,&rank_prec_comm);
2366   j = ( !rank_prec_comm ? size_prec_comm : 0);
2367   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2368   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2369   /* get maximum size of subset */
2370   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2371   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2372   max_local = 0;
2373   if (n_local_dofs) {
2374     max_local = temp_global_dofs[0];
2375     for (i=1;i<n_local_dofs;i++) {
2376       if (max_local < temp_global_dofs[i] ) {
2377         max_local = temp_global_dofs[i];
2378       }
2379     }
2380   }
2381   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2382   max_global++;
2383   max_local = 0;
2384   if (n_local_dofs) {
2385     max_local = local_dofs[0];
2386     for (i=1;i<n_local_dofs;i++) {
2387       if (max_local < local_dofs[i] ) {
2388         max_local = local_dofs[i];
2389       }
2390     }
2391   }
2392   max_local++;
2393   /* allocate workspace */
2394   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2395   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2396   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2397   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2398   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2399   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2400   /* create scatter */
2401   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2402   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2403   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2404   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2405   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2406   /* init array */
2407   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2408   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2409   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2410   if (local_dofs_mult) {
2411     for (i=0;i<n_local_dofs;i++) {
2412       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2413     }
2414   } else {
2415     for (i=0;i<n_local_dofs;i++) {
2416       array[local_dofs[i]]=1.0;
2417     }
2418   }
2419   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2420   /* scatter into global vec and get total number of global dofs */
2421   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2422   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2423   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2424   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2425   /* Fill global_vec with cumulative function for global numbering */
2426   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2427   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2428   nlocals = 0;
2429   first_index = -1;
2430   first_found = PETSC_FALSE;
2431   for (i=0;i<s;i++) {
2432     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2433       first_found = PETSC_TRUE;
2434       first_index = i;
2435     }
2436     nlocals += (PetscInt)PetscRealPart(array[i]);
2437   }
2438   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2439   if (!rank_prec_comm) {
2440     dof_displs[0]=0;
2441     for (i=1;i<size_prec_comm;i++) {
2442       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2443     }
2444   }
2445   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2446   if (first_found) {
2447     array[first_index] += (PetscScalar)nlocals;
2448     old_index = first_index;
2449     for (i=first_index+1;i<s;i++) {
2450       if (PetscRealPart(array[i]) > 0.0) {
2451         array[i] += array[old_index];
2452         old_index = i;
2453       }
2454     }
2455   }
2456   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2457   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2458   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2459   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2460   /* get global ordering of local dofs */
2461   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2462   if (local_dofs_mult) {
2463     for (i=0;i<n_local_dofs;i++) {
2464       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2465     }
2466   } else {
2467     for (i=0;i<n_local_dofs;i++) {
2468       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2469     }
2470   }
2471   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2472   /* free workspace */
2473   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2474   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2475   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2476   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2477   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2478   /* return pointer to global ordering of local dofs */
2479   *global_numbering_subset = temp_global_dofs;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2485 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2486 {
2487   PetscInt       i,j;
2488   PetscScalar    *alphas;
2489   PetscErrorCode ierr;
2490 
2491   PetscFunctionBegin;
2492   /* this implements stabilized Gram-Schmidt */
2493   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2494   for (i=0;i<n;i++) {
2495     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2496     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2497     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2498   }
2499   ierr = PetscFree(alphas);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 /* TODO
2504    - now preallocation is done assuming SEQDENSE local matrices
2505 */
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatISGetMPIXAIJ"
2508 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2509 {
2510   Mat                    new_mat;
2511   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2512   /* info on mat */
2513   /* ISLocalToGlobalMapping rmapping,cmapping; */
2514   PetscInt               bs,rows,cols;
2515   PetscInt               lrows,lcols;
2516   PetscInt               local_rows,local_cols;
2517   PetscBool              isdense;
2518   /* values insertion */
2519   PetscScalar            *array;
2520   PetscInt               *local_indices,*global_indices;
2521   /* work */
2522   PetscInt               i,j,index_row;
2523   PetscErrorCode         ierr;
2524 
2525   PetscFunctionBegin;
2526   /* MISSING CHECKS
2527     - rectangular case not covered (it is not allowed by MATIS)
2528   */
2529   /* get info from mat */
2530   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2531   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2532   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2533   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2534 
2535   /* work */
2536   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2537   for (i=0;i<local_rows;i++) local_indices[i]=i;
2538   /* map indices of local mat to global values */
2539   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2540   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2541   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2542 
2543   if (reuse==MAT_INITIAL_MATRIX) {
2544     Vec         vec_dnz,vec_onz;
2545     PetscScalar *my_dnz,*my_onz;
2546     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2547     PetscInt    index_col,owner;
2548     PetscMPIInt nsubdomains;
2549 
2550     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2551     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2552     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2553     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2554     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2555     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2556     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2557 
2558     /*
2559       preallocation
2560     */
2561 
2562     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2563     /*
2564        Some vectors are needed to sum up properly on shared interface dofs.
2565        Preallocation macros cannot do the job.
2566        Note that preallocation is not exact, since it overestimates nonzeros
2567     */
2568     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2569     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2570     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2571     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2572     /* All processes need to compute entire row ownership */
2573     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2574     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2575     for (i=0;i<nsubdomains;i++) {
2576       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2577         row_ownership[j]=i;
2578       }
2579     }
2580 
2581     /*
2582        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2583        then, they will be summed up properly. This way, preallocation is always sufficient
2584     */
2585     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2586     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2587     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2588     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2589     for (i=0;i<local_rows;i++) {
2590       index_row = global_indices[i];
2591       for (j=i;j<local_rows;j++) {
2592         owner = row_ownership[index_row];
2593         index_col = global_indices[j];
2594         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2595           my_dnz[i] += 1.0;
2596         } else { /* offdiag block */
2597           my_onz[i] += 1.0;
2598         }
2599         /* same as before, interchanging rows and cols */
2600         if (i != j) {
2601           owner = row_ownership[index_col];
2602           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2603             my_dnz[j] += 1.0;
2604           } else {
2605             my_onz[j] += 1.0;
2606           }
2607         }
2608       }
2609     }
2610     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2611     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2612     if (local_rows) { /* multilevel guard */
2613       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2614       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2615     }
2616     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2617     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2618     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2619     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2620     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2621     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2622     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2623 
2624     /* set computed preallocation in dnz and onz */
2625     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2626     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2627     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2628     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2629     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2630     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2631     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2632     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2633 
2634     /* Resize preallocation if overestimated */
2635     for (i=0;i<lrows;i++) {
2636       dnz[i] = PetscMin(dnz[i],lcols);
2637       onz[i] = PetscMin(onz[i],cols-lcols);
2638     }
2639     /* set preallocation */
2640     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2641     for (i=0;i<lrows/bs;i++) {
2642       dnz[i] = dnz[i*bs]/bs;
2643       onz[i] = onz[i*bs]/bs;
2644     }
2645     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2646     for (i=0;i<lrows/bs;i++) {
2647       dnz[i] = dnz[i]-i;
2648     }
2649     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2650     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2651     *M = new_mat;
2652   } else {
2653     PetscInt mbs,mrows,mcols;
2654     /* some checks */
2655     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2656     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2657     if (mrows != rows) {
2658       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2659     }
2660     if (mrows != rows) {
2661       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2662     }
2663     if (mbs != bs) {
2664       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2665     }
2666     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2667   }
2668   /* set local to global mappings */
2669   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2670   /* Set values */
2671   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2672   if (isdense) { /* special case for dense local matrices */
2673     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2674     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2675     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2676     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2677     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2678     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2679   } else { /* very basic values insertion for all other matrix types */
2680     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2681     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2682     for (i=0;i<local_rows;i++) {
2683       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2684       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2685       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2686       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2687       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2688       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2689     }
2690   }
2691   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2692   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2693   if (isdense) {
2694     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2695   }
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatISSubassemble_Private"
2701 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2702 {
2703   Mat             subdomain_adj;
2704   IS              new_ranks,ranks_send_to;
2705   MatPartitioning partitioner;
2706   Mat_IS          *matis;
2707   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2708   PetscInt        prank;
2709   PetscMPIInt     size,rank,color;
2710   PetscInt        *xadj,*adjncy,*oldranks;
2711   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2712   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2713   PetscErrorCode  ierr;
2714   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2715   PetscSubcomm    subcomm;
2716 
2717   PetscFunctionBegin;
2718   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2719   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2720   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2721 
2722   /* Get info on mapping */
2723   matis = (Mat_IS*)(mat->data);
2724   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2725   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2726 
2727   /* build local CSR graph of subdomains' connectivity */
2728   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2729   xadj[0] = 0;
2730   xadj[1] = PetscMax(n_neighs-1,0);
2731   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2732   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2733 
2734   if (threshold) {
2735     PetscInt* count,min_threshold;
2736     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2737     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2738     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2739       for (j=0;j<n_shared[i];j++) {
2740         count[shared[i][j]] += 1;
2741       }
2742     }
2743     /* adapt threshold since we dont want to lose significant connections */
2744     min_threshold = n_neighs;
2745     for (i=1;i<n_neighs;i++) {
2746       for (j=0;j<n_shared[i];j++) {
2747         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2748       }
2749     }
2750     threshold = PetscMax(min_threshold+1,threshold);
2751     xadj[1] = 0;
2752     for (i=1;i<n_neighs;i++) {
2753       for (j=0;j<n_shared[i];j++) {
2754         if (count[shared[i][j]] < threshold) {
2755           adjncy[xadj[1]] = neighs[i];
2756           adjncy_wgt[xadj[1]] = n_shared[i];
2757           xadj[1]++;
2758           break;
2759         }
2760       }
2761     }
2762     ierr = PetscFree(count);CHKERRQ(ierr);
2763   } else {
2764     if (xadj[1]) {
2765       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2766       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2767     }
2768   }
2769   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2770   if (use_square) {
2771     for (i=0;i<xadj[1];i++) {
2772       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2773     }
2774   }
2775   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2776 
2777   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2778 
2779   /*
2780     Restrict work on active processes only.
2781   */
2782   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2783   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2784   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2785   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2786   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2787   if (color) {
2788     ierr = PetscFree(xadj);CHKERRQ(ierr);
2789     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2790     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2791   } else {
2792     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2793     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2794     prank = rank;
2795     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2796     for (i=0;i<size;i++) {
2797       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2798     }
2799     for (i=0;i<xadj[1];i++) {
2800       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2801     }
2802     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2803     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2804     n_subdomains = (PetscInt)size;
2805     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2806 
2807     /* Partition */
2808     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2809     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2810     if (use_vwgt) {
2811       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2812       v_wgt[0] = local_size;
2813       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2814     }
2815     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2816     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2817     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2818     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2819     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2820 
2821     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2822     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2823     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2824     /* clean up */
2825     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2826     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2827     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2828     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2829   }
2830   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2831 
2832   /* assemble parallel IS for sends */
2833   i = 1;
2834   if (color) i=0;
2835   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2836   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2837 
2838   /* get back IS */
2839   *is_sends = ranks_send_to;
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2844 
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatISSubassemble"
2847 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2848 {
2849   Mat                    local_mat,new_mat;
2850   Mat_IS                 *matis;
2851   IS                     is_sends_internal;
2852   PetscInt               rows,cols;
2853   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2854   PetscBool              ismatis,isdense;
2855   ISLocalToGlobalMapping l2gmap;
2856   PetscInt*              l2gmap_indices;
2857   const PetscInt*        is_indices;
2858   MatType                new_local_type;
2859   MatTypePrivate         new_local_type_private;
2860   /* buffers */
2861   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2862   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2863   /* MPI */
2864   MPI_Comm               comm;
2865   PetscMPIInt            n_sends,n_recvs,commsize;
2866   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2867   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2868   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2869   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2870   PetscErrorCode         ierr;
2871 
2872   PetscFunctionBegin;
2873   /* checks */
2874   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2875   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2876   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2877   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2878   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2879   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2880   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2881   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2882   PetscValidLogicalCollectiveInt(mat,bs,0);
2883   /* prepare IS for sending if not provided */
2884   if (!is_sends) {
2885     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2886     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2887   } else {
2888     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2889     is_sends_internal = is_sends;
2890   }
2891 
2892   /* get pointer of MATIS data */
2893   matis = (Mat_IS*)mat->data;
2894 
2895   /* get comm */
2896   comm = PetscObjectComm((PetscObject)mat);
2897 
2898   /* compute number of sends */
2899   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2900   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2901 
2902   /* compute number of receives */
2903   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2904   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2905   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2906   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2907   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2908   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2909   ierr = PetscFree(iflags);CHKERRQ(ierr);
2910 
2911   /* prepare send/receive buffers */
2912   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2913   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2914   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2915   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2916 
2917   /* Get data from local mat */
2918   if (!isdense) {
2919     /* TODO: See below some guidelines on how to prepare the local buffers */
2920     /*
2921        send_buffer_vals should contain the raw values of the local matrix
2922        send_buffer_idxs should contain:
2923        - MatType_PRIVATE type
2924        - PetscInt        size_of_l2gmap
2925        - PetscInt        global_row_indices[size_of_l2gmap]
2926        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2927     */
2928   } else {
2929     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2930     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2931     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2932     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2933     send_buffer_idxs[1] = i;
2934     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2935     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2936     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2937     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2938     for (i=0;i<n_sends;i++) {
2939       ilengths_vals[is_indices[i]] = len*len;
2940       ilengths_idxs[is_indices[i]] = len+2;
2941     }
2942   }
2943   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2944   buf_size_idxs = 0;
2945   buf_size_vals = 0;
2946   for (i=0;i<n_recvs;i++) {
2947     buf_size_idxs += (PetscInt)olengths_idxs[i];
2948     buf_size_vals += (PetscInt)olengths_vals[i];
2949   }
2950   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2951   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2952 
2953   /* get new tags for clean communications */
2954   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2955   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2956 
2957   /* allocate for requests */
2958   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2959   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2960   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2961   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2962 
2963   /* communications */
2964   ptr_idxs = recv_buffer_idxs;
2965   ptr_vals = recv_buffer_vals;
2966   for (i=0;i<n_recvs;i++) {
2967     source_dest = onodes[i];
2968     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2969     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2970     ptr_idxs += olengths_idxs[i];
2971     ptr_vals += olengths_vals[i];
2972   }
2973   for (i=0;i<n_sends;i++) {
2974     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2975     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2976     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2977   }
2978   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2979   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2980 
2981   /* assemble new l2g map */
2982   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2983   ptr_idxs = recv_buffer_idxs;
2984   buf_size_idxs = 0;
2985   for (i=0;i<n_recvs;i++) {
2986     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2987     ptr_idxs += olengths_idxs[i];
2988   }
2989   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2990   ptr_idxs = recv_buffer_idxs;
2991   buf_size_idxs = 0;
2992   for (i=0;i<n_recvs;i++) {
2993     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2994     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2995     ptr_idxs += olengths_idxs[i];
2996   }
2997   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2998   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2999   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3000 
3001   /* infer new local matrix type from received local matrices type */
3002   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3003   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3004   new_local_type_private = MATAIJ_PRIVATE;
3005   new_local_type = MATSEQAIJ;
3006   if (n_recvs) {
3007     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3008     ptr_idxs = recv_buffer_idxs;
3009     for (i=0;i<n_recvs;i++) {
3010       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3011         new_local_type_private = MATAIJ_PRIVATE;
3012         break;
3013       }
3014       ptr_idxs += olengths_idxs[i];
3015     }
3016     switch (new_local_type_private) {
3017       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3018         new_local_type = MATSEQAIJ;
3019         bs = 1;
3020         break;
3021       case MATAIJ_PRIVATE:
3022         new_local_type = MATSEQAIJ;
3023         bs = 1;
3024         break;
3025       case MATBAIJ_PRIVATE:
3026         new_local_type = MATSEQBAIJ;
3027         break;
3028       case MATSBAIJ_PRIVATE:
3029         new_local_type = MATSEQSBAIJ;
3030         break;
3031       default:
3032         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3033         break;
3034     }
3035   }
3036 
3037   /* create MATIS object */
3038   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3039   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3040   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3041   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3042   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3043   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3044 
3045   /* set values */
3046   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3047   ptr_vals = recv_buffer_vals;
3048   ptr_idxs = recv_buffer_idxs;
3049   for (i=0;i<n_recvs;i++) {
3050     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3051       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3052       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3053       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3054       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3055       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3056     }
3057     ptr_idxs += olengths_idxs[i];
3058     ptr_vals += olengths_vals[i];
3059   }
3060   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064 
3065   { /* check */
3066     Vec       lvec,rvec;
3067     PetscReal infty_error;
3068 
3069     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3070     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3071     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3072     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3073     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3074     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3075     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3076     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3077     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3078   }
3079 
3080   /* free workspace */
3081   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3082   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3083   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3084   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3085   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3086   if (isdense) {
3087     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3088     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3089   } else {
3090     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3091   }
3092   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3093   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3094   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3095   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3096   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3097   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3098   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3099   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3100   ierr = PetscFree(onodes);CHKERRQ(ierr);
3101   /* get back new mat */
3102   *mat_n = new_mat;
3103   PetscFunctionReturn(0);
3104 }
3105 
3106 #undef __FUNCT__
3107 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3108 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3109 {
3110   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3111   PC_IS                  *pcis = (PC_IS*)pc->data;
3112   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3113   MatNullSpace           CoarseNullSpace=NULL;
3114   ISLocalToGlobalMapping coarse_islg;
3115   IS                     coarse_is;
3116   PetscInt               max_it;
3117   PetscInt               im_active=-1,active_procs=-1;
3118   PC                     pc_temp;
3119   PCType                 coarse_pc_type;
3120   KSPType                coarse_ksp_type;
3121   PetscBool              multilevel_requested,multilevel_allowed;
3122   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3123   MatStructure           matstruct;
3124   PetscErrorCode         ierr;
3125 
3126   PetscFunctionBegin;
3127   /* Assign global numbering to coarse dofs */
3128   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3129     PetscInt ocoarse_size;
3130     ocoarse_size = pcbddc->coarse_size;
3131     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3132     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3133     /* see if we can avoid some work */
3134     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3135       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3136         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3137         coarse_reuse = PETSC_FALSE;
3138       } else { /* we can safely reuse already computed coarse matrix */
3139         coarse_reuse = PETSC_TRUE;
3140       }
3141     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3142       coarse_reuse = PETSC_FALSE;
3143     }
3144   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3145     coarse_reuse = PETSC_TRUE;
3146   }
3147 
3148   /* infer some info from user */
3149   issym = PETSC_FALSE;
3150   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3151   multilevel_allowed = PETSC_FALSE;
3152   multilevel_requested = PETSC_FALSE;
3153   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3154   if (multilevel_requested) {
3155     /* count "active processes" */
3156     im_active = !!(pcis->n);
3157     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3158     if (active_procs/pcbddc->coarsening_ratio < 2) {
3159       multilevel_allowed = PETSC_FALSE;
3160     } else {
3161       multilevel_allowed = PETSC_TRUE;
3162     }
3163   }
3164 
3165   /* set defaults for coarse KSP and PC */
3166   if (multilevel_allowed) {
3167     if (issym) {
3168       coarse_ksp_type = KSPRICHARDSON;
3169     } else {
3170       coarse_ksp_type = KSPCHEBYSHEV;
3171     }
3172     coarse_pc_type = PCBDDC;
3173   } else {
3174     coarse_ksp_type = KSPPREONLY;
3175     coarse_pc_type = PCREDUNDANT;
3176   }
3177 
3178   /* create the coarse KSP object only once with defaults */
3179   if (!pcbddc->coarse_ksp) {
3180     char prefix[256],str_level[3];
3181     size_t len;
3182     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3183     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3184     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3185     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3186     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3187     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3188     /* prefix */
3189     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3190     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3191     if (!pcbddc->current_level) {
3192       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3193       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3194     } else {
3195       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3196       if (pcbddc->current_level>1) len -= 2;
3197       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3198       *(prefix+len)='\0';
3199       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3200       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3201     }
3202     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3203   }
3204   /* allow user customization */
3205   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3206   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3207   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3208 
3209   /* get some info after set from options */
3210   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3211   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3212   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3213   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3214     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3215     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3216     isbddc = PETSC_FALSE;
3217   }
3218 
3219   /* propagate BDDC info to the next level */
3220   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3221   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3222   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3223 
3224   /* creates temporary MATIS object for coarse matrix */
3225   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3226   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3227   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3228   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3229   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3230   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3231   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3232   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3233   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3234 
3235   /* assemble coarse matrix */
3236   if (isbddc || isnn) {
3237     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3238   } else {
3239     if (coarse_reuse) {
3240       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3241       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3242       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3243     } else {
3244       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3245     }
3246   }
3247   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3248 
3249   /* create local to global scatters for coarse problem */
3250   if (pcbddc->new_primal_space) {
3251     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3252     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3253     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3254     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3255     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3256   }
3257   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3258 
3259   /* propagate symmetry info to coarse matrix */
3260   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3261 
3262   /* Compute coarse null space (special handling by BDDC only) */
3263   if (pcbddc->NullSpace) {
3264     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3265     if (isbddc) {
3266       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3267     } else {
3268       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3269     }
3270   }
3271 
3272   /* set operators */
3273   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3274   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3275 
3276   /* additional KSP customization */
3277   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3278   if (max_it < 5) {
3279     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3280   }
3281   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3282 
3283 
3284   /* print some info if requested */
3285   if (pcbddc->dbg_flag) {
3286     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3287     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3288     if (!multilevel_allowed) {
3289       if (multilevel_requested) {
3290         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3291       } else if (pcbddc->max_levels) {
3292         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3293       }
3294     }
3295     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3296     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3297   }
3298 
3299   /* setup coarse ksp */
3300   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3301   if (pcbddc->dbg_flag) {
3302     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3303     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3304     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3305     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3306   }
3307 
3308   /* Check coarse problem if requested */
3309   if (pcbddc->dbg_flag) {
3310     KSP       check_ksp;
3311     KSPType   check_ksp_type;
3312     PC        check_pc;
3313     Vec       check_vec;
3314     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3315     PetscInt  its;
3316     PetscBool ispreonly,compute;
3317 
3318     /* Create ksp object suitable for estimation of extreme eigenvalues */
3319     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3320     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3321     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3322     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3323     if (ispreonly) {
3324       check_ksp_type = KSPPREONLY;
3325       compute = PETSC_FALSE;
3326     } else {
3327       if (issym) check_ksp_type = KSPCG;
3328       else check_ksp_type = KSPGMRES;
3329       compute = PETSC_TRUE;
3330     }
3331     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3332     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3333     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3334     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3335     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3336     /* create random vec */
3337     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3338     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3339     if (CoarseNullSpace) {
3340       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3341     }
3342     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3343     /* solve coarse problem */
3344     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3345     if (CoarseNullSpace) {
3346       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3347     }
3348     /* check coarse problem residual error */
3349     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3350     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3351     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3352     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3353     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3354     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3355     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3356     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3357     /* get eigenvalue estimation if preonly has not been requested */
3358     if (!ispreonly) {
3359       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3360       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3361       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3362     }
3363     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3364     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3365   }
3366   /* free memory */
3367   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3368   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 #undef __FUNCT__
3373 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3374 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3375 {
3376   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3377   PC_IS*         pcis = (PC_IS*)pc->data;
3378   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3379   PetscInt       i,coarse_size;
3380   PetscInt       *local_primal_indices;
3381   PetscErrorCode ierr;
3382 
3383   PetscFunctionBegin;
3384   /* Compute global number of coarse dofs */
3385   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3386     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3387   }
3388   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3389 
3390   /* check numbering */
3391   if (pcbddc->dbg_flag) {
3392     PetscScalar coarsesum,*array;
3393 
3394     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3395     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3397     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3398     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3399     for (i=0;i<pcbddc->local_primal_size;i++) {
3400       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3401     }
3402     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3403     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3404     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3405     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3406     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3407     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3408     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3409     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3410     for (i=0;i<pcis->n;i++) {
3411       if (array[i] == 1.0) {
3412         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3413       }
3414     }
3415     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3416     for (i=0;i<pcis->n;i++) {
3417       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3418     }
3419     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3420     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3421     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3422     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3423     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3424     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3425     if (pcbddc->dbg_flag > 1) {
3426       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3427       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3428       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3429       for (i=0;i<pcbddc->local_primal_size;i++) {
3430         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3431       }
3432       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3433     }
3434     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3435   }
3436   /* get back data */
3437   *coarse_size_n = coarse_size;
3438   *local_primal_indices_n = local_primal_indices;
3439   PetscFunctionReturn(0);
3440 }
3441 
3442