xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision f4ddd8eecafc09061839cfbaf2bc8c47641ebc86)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
23 
24   /* Change global null space passed in by the user if change of basis has been requested */
25   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
26     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
27   }
28 
29   /*
30      Setup local correction and local part of coarse basis.
31      Gives back the dense local part of the coarse matrix in column major ordering
32   */
33   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
34 
35   /* Compute total number of coarse nodes and setup coarse solver */
36   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
37 
38   /* free */
39   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCBDDCResetCustomization"
45 PetscErrorCode PCBDDCResetCustomization(PC pc)
46 {
47   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
48   PetscInt       i;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
57   for (i=0;i<pcbddc->n_ISForDofs;i++) {
58     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
59   }
60   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
61   pcbddc->n_ISForDofs = 0;
62   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
63   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
116 {
117   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
118   PC_IS          *pcis = (PC_IS*)pc->data;
119   VecType        impVecType;
120   PetscInt       n_vertices,n_constraints,local_primal_size,n_R,old_size;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   if (!pcbddc->ConstraintMatrix) {
125     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
126   }
127   /* get sizes */
128   ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr);
129   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
130   n_constraints = local_primal_size - n_vertices;
131   n_R = pcis->n-n_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   ISLocalToGlobalMapping BtoNmap;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *vertices,*idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices and their local indices */
205   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
220   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
222   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
223   if (i != n_vertices) {
224     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
225   }
226 
227   /* Precompute stuffs needed for preprocessing and application of BDDC*/
228   if (n_constraints) {
229     /* see if we can save some allocations */
230     if (pcbddc->local_auxmat2) {
231       PetscInt on_R,on_constraints;
232       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
233       if (on_R != n_R || on_constraints != n_constraints) {
234         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
235         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
236       }
237     }
238     /* work vectors */
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
241     /* auxiliary matrices */
242     if (!pcbddc->local_auxmat2) {
243       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
244       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
245       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
246       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
247     }
248 
249     /* Extract constraints on R nodes: C_{CR}  */
250     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
251     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
252     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
253 
254     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
255     for (i=0;i<n_constraints;i++) {
256       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
257       /* Get row of constraint matrix in R numbering */
258       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
260       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
262       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
263       /* Solve for row of constraint matrix in R numbering */
264       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
265       /* Set values in local_auxmat2 */
266       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
268       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269     }
270     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
273 
274     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
275     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
276     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
277     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
278     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
279     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
280     ierr = MatSetUp(M1);CHKERRQ(ierr);
281     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
282     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
283     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
284     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
285     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
286     ierr = MatDestroy(&M2);CHKERRQ(ierr);
287     ierr = MatDestroy(&M3);CHKERRQ(ierr);
288     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
289     if (!pcbddc->local_auxmat1) {
290       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     } else {
292       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     }
294   }
295 
296   /* Get submatrices from subdomain matrix */
297   if (n_vertices) {
298     PetscInt ibs,mbs;
299     PetscBool issbaij;
300     Mat newmat;
301 
302     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
303     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
304     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
305     if (ibs != mbs) { /* need to convert to SEQAIJ */
306       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
310       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
311     } else {
312       /* this is safe */
313       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
314       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
315       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
316         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
317         /* which of the two approaches is faster? */
318         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
320         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
322         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
323       } else {
324         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
325         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
326       }
327     }
328     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
329     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
330     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
331   }
332 
333   /* Matrix of coarse basis functions (local) */
334   if (pcbddc->coarse_phi_B) {
335     PetscInt on_B,on_primal;
336     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
337     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
338       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
339       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
340     }
341   }
342   if (pcbddc->coarse_phi_D) {
343     PetscInt on_D,on_primal;
344     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
345     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
346       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
347       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
348     }
349   }
350   if (!pcbddc->coarse_phi_B) {
351     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
352     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
353     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
354     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
355   }
356   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
357     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
358     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
359     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
360     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
361   }
362 
363   if (pcbddc->dbg_flag) {
364     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
365     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
366     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
367   }
368   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
369   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
370 
371   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
372 
373   /* vertices */
374   for (i=0;i<n_vertices;i++) {
375     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
376     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
377     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
378     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
379     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
380     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
381     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
382     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
385     if (n_constraints) {
386       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
387       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
388       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     }
390     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
392 
393     /* Set values in coarse basis function and subdomain part of coarse_mat */
394     /* coarse basis functions */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
402     if (pcbddc->switch_static || pcbddc->dbg_flag) {
403       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
407       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408     }
409     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
410     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     if (n_constraints) {
414       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417     }
418 
419     /* check */
420     if (pcbddc->dbg_flag) {
421       /* assemble subdomain vector on local nodes */
422       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
423       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
428       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
429       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
430       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
431       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
433       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
434       if (n_constraints) {
435         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
436         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
437         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       }
439       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
442       /* check saddle point solution */
443       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
446       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
447       /* shift by the identity matrix */
448       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
452     }
453   }
454 
455   /* constraints */
456   for (i=0;i<n_constraints;i++) {
457     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
458     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
459     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
460     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
461     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
462     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
463     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
464     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
465     if (n_vertices) {
466       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
467     }
468     /* Set values in coarse basis function and subdomain part of coarse_mat */
469     /* coarse basis functions */
470     j = i+n_vertices; /* don't touch this! */
471     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
472     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
476     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
477     if (pcbddc->switch_static || pcbddc->dbg_flag) {
478       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
482       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
483     }
484     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
485     if (n_vertices) {
486       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
487       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
488       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
489     }
490     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
491     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
492     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
493 
494     if (pcbddc->dbg_flag) {
495       /* assemble subdomain vector on nodes */
496       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
500       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
501       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
502       /* assemble subdomain vector of lagrange multipliers */
503       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
504       if (n_vertices) {
505         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
506         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
507         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
508       }
509       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
511       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
512       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
514       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
515       /* check saddle point solution */
516       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
518       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
519       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
520       /* shift by the identity matrix */
521       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
522       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
524       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
525     }
526   }
527   /* call assembling routines for local coarse basis */
528   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   if (pcbddc->switch_static || pcbddc->dbg_flag) {
531     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533   }
534 
535   /* compute other basis functions for non-symmetric problems */
536   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
537   if (!setsym || (setsym && !issym)) {
538     if (!pcbddc->coarse_psi_B) {
539       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
540       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
541       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
542       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
543     }
544     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
545       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
546       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
547       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
548       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
549     }
550     for (i=0;i<pcbddc->local_primal_size;i++) {
551       if (n_constraints) {
552         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
553         for (j=0;j<n_constraints;j++) {
554           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
555         }
556         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
557         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
558       }
559       if (i<n_vertices) {
560         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
561         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
562         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
563         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
564         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
565         if (n_constraints) {
566           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
567         }
568       } else {
569         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
570       }
571       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
572       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
573       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
575       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
577       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
578       if (i<n_vertices) {
579         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
580       }
581       if (pcbddc->switch_static || pcbddc->dbg_flag) {
582         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
586         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
587       }
588 
589       if (pcbddc->dbg_flag) {
590         /* assemble subdomain vector on nodes */
591         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
592         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
594         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
595         if (i<n_vertices) {
596           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
597         }
598         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
599         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
600         /* assemble subdomain vector of lagrange multipliers */
601         for (j=0;j<pcbddc->local_primal_size;j++) {
602           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
603         }
604         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
605         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
606         /* check saddle point solution */
607         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
609         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
610         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
611         /* shift by the identity matrix */
612         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
613         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
615         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
616       }
617     }
618     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     if (pcbddc->switch_static || pcbddc->dbg_flag) {
621       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
623     }
624   }
625   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
626   /* Checking coarse_sub_mat and coarse basis functios */
627   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
628   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
629   if (pcbddc->dbg_flag) {
630     Mat         coarse_sub_mat;
631     Mat         AUXMAT,TM1,TM2,TM3,TM4;
632     Mat         coarse_phi_D,coarse_phi_B;
633     Mat         coarse_psi_D,coarse_psi_B;
634     Mat         A_II,A_BB,A_IB,A_BI;
635     MatType     checkmattype=MATSEQAIJ;
636     PetscReal   real_value;
637 
638     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
639     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
640     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
641     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
642     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
643     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
644     if (pcbddc->coarse_psi_B) {
645       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
646       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
647     }
648     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
649 
650     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
651     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
652     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
653     if (pcbddc->coarse_psi_B) {
654       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
655       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
656       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
657       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
658       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
659       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
660       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
664       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
665       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
666     } else {
667       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
668       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
669       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
673       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
674       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
675     }
676     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
680     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
681     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
685     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
686     for (i=0;i<pcbddc->local_primal_size;i++) {
687       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
688     }
689     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
690     for (i=0;i<pcbddc->local_primal_size;i++) {
691       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
692     }
693     if (pcbddc->coarse_psi_B) {
694       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
695       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
696         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
697       }
698       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
699       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
700         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
701       }
702     }
703     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
707     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
711     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
712     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
713     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
714     if (pcbddc->coarse_psi_B) {
715       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
716       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
717     }
718     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
719     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
720     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
721     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
722   }
723   /* free memory */
724   if (n_vertices) {
725     ierr = PetscFree(vertices);CHKERRQ(ierr);
726     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
727     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
731   }
732   if (n_constraints) {
733     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
735     ierr = MatDestroy(&M1);CHKERRQ(ierr);
736     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
737   }
738   ierr = PetscFree(auxindices);CHKERRQ(ierr);
739   /* get back data */
740   *coarse_submat_vals_n = coarse_submat_vals;
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
747 {
748   PC_IS*            pcis = (PC_IS*)(pc->data);
749   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
750   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
751   PetscBool         issbaij,isbaij;
752   /* manage repeated solves */
753   MatReuse          reuse;
754   MatStructure      matstruct;
755   PetscErrorCode    ierr;
756 
757   PetscFunctionBegin;
758   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
759     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
760   }
761   /* get mat flags */
762   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
763   reuse = MAT_INITIAL_MATRIX;
764   if (pc->setupcalled) {
765     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
766     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
767     if (matstruct == SAME_NONZERO_PATTERN) {
768       reuse = MAT_REUSE_MATRIX;
769     } else {
770       reuse = MAT_INITIAL_MATRIX;
771     }
772   }
773   if (reuse == MAT_INITIAL_MATRIX) {
774     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
777     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
779   }
780 
781   /* transform local matrices if needed */
782   if (pcbddc->use_change_of_basis) {
783     Mat         change_mat_all;
784     PetscScalar *row_cmat_values;
785     PetscInt    *row_cmat_indices;
786     PetscInt    *nnz,*is_indices,*temp_indices;
787     PetscInt    i,j,k,n_D,n_B;
788 
789     /* Get Non-overlapping dimensions */
790     n_B = pcis->n_B;
791     n_D = pcis->n-n_B;
792 
793     /* compute nonzero structure of change of basis on all local nodes */
794     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
797     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
798     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     k=1;
800     for (i=0;i<n_B;i++) {
801       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802       nnz[is_indices[i]]=j;
803       if (k < j) k = j;
804       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
805     }
806     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
807     /* assemble change of basis matrix on the whole set of local dofs */
808     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
809     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
810     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
811     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
812     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
813     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     for (i=0;i<n_D;i++) {
815       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
819     for (i=0;i<n_B;i++) {
820       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
822       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
823       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
824     }
825     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
826     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
828     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
829     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
830     if (!issbaij && !isbaij) {
831       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
832     } else {
833       Mat work_mat;
834       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
835       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
836       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
837     }
838     /*
839     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
840     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
841     */
842     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
843     ierr = PetscFree(nnz);CHKERRQ(ierr);
844     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
845   } else {
846     /* without change of basis, the local matrix is unchanged */
847     if (!pcbddc->local_mat) {
848       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
849       pcbddc->local_mat = matis->A;
850     }
851   }
852 
853   /* get submatrices */
854   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
855   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
856   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
857   if (!issbaij) {
858     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
859     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
860   } else {
861     Mat newmat;
862     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
863     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
865     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
873 {
874   PC_IS*         pcis = (PC_IS*)(pc->data);
875   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
876   IS             is_aux1,is_aux2;
877   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
878   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
879   PetscInt       vbs,bs;
880   PetscBT        bitmask;
881   PetscErrorCode ierr;
882 
883   PetscFunctionBegin;
884   /* No need to setup local scatters if primal space is unchanged */
885   if (!pcbddc->new_primal_space) {
886     PetscFunctionReturn(0);
887   }
888   /* destroy old objects */
889   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
890   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
892   /* Set Non-overlapping dimensions */
893   n_B = pcis->n_B; n_D = pcis->n - n_B;
894   /* get vertex indices from constraint matrix */
895   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
896 
897   /* create auxiliary bitmask */
898   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
899   for (i=0;i<n_vertices;i++) {
900     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
901   }
902 
903   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
904   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
905   for (i=0, n_R=0; i<pcis->n; i++) {
906     if (!PetscBTLookup(bitmask,i)) {
907       idx_R_local[n_R] = i;
908       n_R++;
909     }
910   }
911 
912   /* Block code */
913   vbs = 1;
914   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
915   if (bs>1 && !(n_vertices%bs)) {
916     PetscBool is_blocked = PETSC_TRUE;
917     PetscInt  *vary;
918     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
919     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
920     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
921     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
922     for (i=0; i<n_vertices; i++) {
923       if (vary[i]!=0 && vary[i]!=bs) {
924         is_blocked = PETSC_FALSE;
925         break;
926       }
927     }
928     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
929       vbs = bs;
930       for (i=0;i<n_R/vbs;i++) {
931         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
932       }
933     }
934     ierr = PetscFree(vary);CHKERRQ(ierr);
935   }
936   ierr = PetscFree(vertices);CHKERRQ(ierr);
937   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
938   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
939 
940   /* print some info if requested */
941   if (pcbddc->dbg_flag) {
942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
948     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
949   }
950 
951   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
952   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
953   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
954   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
955   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
956   for (i=0; i<n_D; i++) {
957     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
958   }
959   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
960   for (i=0, j=0; i<n_R; i++) {
961     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
962       aux_array1[j++] = i;
963     }
964   }
965   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
966   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   for (i=0, j=0; i<n_B; i++) {
968     if (!PetscBTLookup(bitmask,is_indices[i])) {
969       aux_array2[j++] = i;
970     }
971   }
972   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
973   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
974   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
975   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
976   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
977 
978   if (pcbddc->switch_static || pcbddc->dbg_flag) {
979     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
980     for (i=0, j=0; i<n_R; i++) {
981       if (PetscBTLookup(bitmask,idx_R_local[i])) {
982         aux_array1[j++] = i;
983       }
984     }
985     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
986     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
987     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
988   }
989   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
990   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
997 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
998 {
999   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1000   PC_IS          *pcis = (PC_IS*)pc->data;
1001   PC             pc_temp;
1002   Mat            A_RR;
1003   Vec            vec1,vec2,vec3;
1004   MatStructure   matstruct;
1005   MatReuse       reuse;
1006   PetscScalar    m_one = -1.0;
1007   PetscReal      value;
1008   PetscInt       n_D,n_R,ibs,mbs;
1009   PetscBool      use_exact,use_exact_reduced,issbaij;
1010   PetscErrorCode ierr;
1011   /* prefixes stuff */
1012   char           dir_prefix[256],neu_prefix[256],str_level[3];
1013   size_t         len;
1014 
1015   PetscFunctionBegin;
1016   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1017 
1018   /* compute prefixes */
1019   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1020   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1021   if (!pcbddc->current_level) {
1022     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1023     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1025     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1026   } else {
1027     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1028     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1029     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1030     len -= 15; /* remove "pc_bddc_coarse_" */
1031     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1032     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1033     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     *(dir_prefix+len)='\0';
1035     *(neu_prefix+len)='\0';
1036     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1037     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1039     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1040   }
1041 
1042   /* DIRICHLET PROBLEM */
1043   /* Matrix for Dirichlet problem is pcis->A_II */
1044   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1045   if (!pcbddc->ksp_D) { /* create object if not yet build */
1046     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1047     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1048     /* default */
1049     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1050     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1051     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1052     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1053     if (issbaij) {
1054       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1055     } else {
1056       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1057     }
1058     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1059   }
1060   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1061   /* Allow user's customization */
1062   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1063   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1064   if (!n_D) {
1065     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1066     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1067   }
1068   /* Set Up KSP for Dirichlet problem of BDDC */
1069   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1070   /* set ksp_D into pcis data */
1071   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1072   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1073   pcis->ksp_D = pcbddc->ksp_D;
1074 
1075   /* NEUMANN PROBLEM */
1076   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1077   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1078   if (pcbddc->ksp_R) { /* already created ksp */
1079     PetscInt nn_R;
1080     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1081     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1082     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1083     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1084       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1085       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1086       reuse = MAT_INITIAL_MATRIX;
1087     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1088       if (pcbddc->new_primal_space) { /* we are not sure the matrix will have the same nonzero pattern */
1089         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090         reuse = MAT_INITIAL_MATRIX;
1091       } else { /* safe to reuse the matrix */
1092         reuse = MAT_REUSE_MATRIX;
1093       }
1094     }
1095     /* last check */
1096     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1097       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1098       reuse = MAT_INITIAL_MATRIX;
1099     }
1100   } else { /* first time, so we need to create the matrix */
1101     reuse = MAT_INITIAL_MATRIX;
1102   }
1103   /* extract A_RR */
1104   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1105   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1106   if (ibs != mbs) {
1107     Mat newmat;
1108     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1109     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1110     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1113   }
1114   if (!pcbddc->ksp_R) { /* create object if not present */
1115     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1116     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1117     /* default */
1118     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1119     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1120     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1121     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1122     if (issbaij) {
1123       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1124     } else {
1125       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1126     }
1127     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1128   }
1129   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1130   /* Allow user's customization */
1131   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1132   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1133   if (!n_R) {
1134     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1135     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1136   }
1137   /* Set Up KSP for Neumann problem of BDDC */
1138   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1139 
1140   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1141 
1142   /* Dirichlet */
1143   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1144   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1145   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1146   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1147   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1148   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1149   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1150   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1151   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1152   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1153   /* need to be adapted? */
1154   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1155   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1156   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1157   /* print info */
1158   if (pcbddc->dbg_flag) {
1159     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1160     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1161     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1162     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1163     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1164   }
1165   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1167   }
1168 
1169   /* Neumann */
1170   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1171   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1172   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1173   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1174   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1175   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1176   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1177   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1178   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1179   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1180   /* need to be adapted? */
1181   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1182   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1183   /* print info */
1184   if (pcbddc->dbg_flag) {
1185     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1186     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1187   }
1188   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1189     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1190   }
1191   /* free Neumann problem's matrix */
1192   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1198 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1199 {
1200   PetscErrorCode ierr;
1201   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1202 
1203   PetscFunctionBegin;
1204   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1205   if (pcbddc->local_auxmat1) {
1206     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1207     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1214 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1215 {
1216   PetscErrorCode ierr;
1217   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1218   PC_IS*            pcis = (PC_IS*)  (pc->data);
1219   const PetscScalar zero = 0.0;
1220 
1221   PetscFunctionBegin;
1222   /* Application of PHI^T (or PSI^T)  */
1223   if (pcbddc->coarse_psi_B) {
1224     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1225     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1226   } else {
1227     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1228     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1229   }
1230   /* Scatter data of coarse_rhs */
1231   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1232   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234   /* Local solution on R nodes */
1235   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   if (pcbddc->switch_static) {
1239     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1240     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1241   }
1242   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1243   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1244   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   if (pcbddc->switch_static) {
1247     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249   }
1250 
1251   /* Coarse solution */
1252   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1254     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1255   }
1256   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258 
1259   /* Sum contributions from two levels */
1260   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1261   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1267 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1268 {
1269   PetscErrorCode ierr;
1270   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1271 
1272   PetscFunctionBegin;
1273   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1279 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1280 {
1281   PetscErrorCode ierr;
1282   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1283 
1284   PetscFunctionBegin;
1285   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* uncomment for testing purposes */
1290 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1291 #undef __FUNCT__
1292 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1293 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1294 {
1295   PetscErrorCode    ierr;
1296   PC_IS*            pcis = (PC_IS*)(pc->data);
1297   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1298   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1299   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1300   MatType           impMatType=MATSEQAIJ;
1301   /* one and zero */
1302   PetscScalar       one=1.0,zero=0.0;
1303   /* space to store constraints and their local indices */
1304   PetscScalar       *temp_quadrature_constraint;
1305   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1306   /* iterators */
1307   PetscInt          i,j,k,total_counts,temp_start_ptr;
1308   /* stuff to store connected components stored in pcbddc->mat_graph */
1309   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1310   PetscInt          n_ISForFaces,n_ISForEdges;
1311   /* near null space stuff */
1312   MatNullSpace      nearnullsp;
1313   const Vec         *nearnullvecs;
1314   Vec               *localnearnullsp;
1315   PetscBool         nnsp_has_cnst;
1316   PetscInt          nnsp_size;
1317   PetscScalar       *array;
1318   /* BLAS integers */
1319   PetscBLASInt      lwork,lierr;
1320   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1321   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1322   /* LAPACK working arrays for SVD or POD */
1323   PetscBool         skip_lapack;
1324   PetscScalar       *work;
1325   PetscReal         *singular_vals;
1326 #if defined(PETSC_USE_COMPLEX)
1327   PetscReal         *rwork;
1328 #endif
1329 #if defined(PETSC_MISSING_LAPACK_GESVD)
1330   PetscBLASInt      Blas_one_2=1;
1331   PetscScalar       *temp_basis,*correlation_mat;
1332 #else
1333   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1334   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1335 #endif
1336   /* change of basis */
1337   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1338   PetscBool         boolforchange,qr_needed;
1339   PetscBT           touched,change_basis,qr_needed_idx;
1340   /* auxiliary stuff */
1341   PetscInt          *nnz,*is_indices,*local_to_B;
1342   /* some quantities */
1343   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1344   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1345 
1346 
1347   PetscFunctionBegin;
1348   /* Destroy Mat objects computed previously */
1349   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1350   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1351   /* Get index sets for faces, edges and vertices from graph */
1352   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1353     pcbddc->use_vertices = PETSC_TRUE;
1354   }
1355   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1356   /* HACK: provide functions to set change of basis */
1357   if (!ISForVertices && pcbddc->NullSpace) {
1358     pcbddc->use_change_of_basis = PETSC_TRUE;
1359     pcbddc->use_change_on_faces = PETSC_FALSE;
1360   }
1361   /* print some info */
1362   if (pcbddc->dbg_flag) {
1363     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1364     i = 0;
1365     if (ISForVertices) {
1366       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1367     }
1368     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1369     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1370     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1371     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1372   }
1373   /* check if near null space is attached to global mat */
1374   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1375   if (nearnullsp) {
1376     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1377     /* remove any stored info */
1378     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1379     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1380     /* store information for BDDC solver reuse */
1381     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1382     pcbddc->onearnullspace = nearnullsp;
1383     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1384     for (i=0;i<nnsp_size;i++) {
1385       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1386     }
1387   } else { /* if near null space is not provided BDDC uses constants by default */
1388     nnsp_size = 0;
1389     nnsp_has_cnst = PETSC_TRUE;
1390   }
1391   /* get max number of constraints on a single cc */
1392   max_constraints = nnsp_size;
1393   if (nnsp_has_cnst) max_constraints++;
1394 
1395   /*
1396        Evaluate maximum storage size needed by the procedure
1397        - temp_indices will contain start index of each constraint stored as follows
1398        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1399        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1400        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1401                                                                                                                                                          */
1402   total_counts = n_ISForFaces+n_ISForEdges;
1403   total_counts *= max_constraints;
1404   n_vertices = 0;
1405   if (ISForVertices) {
1406     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1407   }
1408   total_counts += n_vertices;
1409   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1410   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1411   total_counts = 0;
1412   max_size_of_constraint = 0;
1413   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1414     if (i<n_ISForEdges) {
1415       used_IS = &ISForEdges[i];
1416     } else {
1417       used_IS = &ISForFaces[i-n_ISForEdges];
1418     }
1419     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1420     total_counts += j;
1421     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1422   }
1423   total_counts *= max_constraints;
1424   total_counts += n_vertices;
1425   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1426   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1427   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1428   /* local to boundary numbering */
1429   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1430   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1431   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1432   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1433   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1434   /* get local part of global near null space vectors */
1435   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1436   for (k=0;k<nnsp_size;k++) {
1437     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1438     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440   }
1441 
1442   /* whether or not to skip lapack calls */
1443   skip_lapack = PETSC_TRUE;
1444   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1445 
1446   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1447   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1448     PetscScalar temp_work;
1449 #if defined(PETSC_MISSING_LAPACK_GESVD)
1450     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1451     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1452     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1453     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1454 #if defined(PETSC_USE_COMPLEX)
1455     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1456 #endif
1457     /* now we evaluate the optimal workspace using query with lwork=-1 */
1458     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1459     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1460     lwork = -1;
1461     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1462 #if !defined(PETSC_USE_COMPLEX)
1463     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1464 #else
1465     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1466 #endif
1467     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1468     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1469 #else /* on missing GESVD */
1470     /* SVD */
1471     PetscInt max_n,min_n;
1472     max_n = max_size_of_constraint;
1473     min_n = max_constraints;
1474     if (max_size_of_constraint < max_constraints) {
1475       min_n = max_size_of_constraint;
1476       max_n = max_constraints;
1477     }
1478     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1479 #if defined(PETSC_USE_COMPLEX)
1480     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1481 #endif
1482     /* now we evaluate the optimal workspace using query with lwork=-1 */
1483     lwork = -1;
1484     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1485     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1486     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1487     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1488 #if !defined(PETSC_USE_COMPLEX)
1489     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1490 #else
1491     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1492 #endif
1493     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1494     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1495 #endif /* on missing GESVD */
1496     /* Allocate optimal workspace */
1497     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1498     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1499   }
1500   /* Now we can loop on constraining sets */
1501   total_counts = 0;
1502   temp_indices[0] = 0;
1503   /* vertices */
1504   if (ISForVertices) {
1505     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1506     if (nnsp_has_cnst) { /* consider all vertices */
1507       for (i=0;i<n_vertices;i++) {
1508         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1509         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1510         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1511         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1512         total_counts++;
1513       }
1514     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1515       PetscBool used_vertex;
1516       for (i=0;i<n_vertices;i++) {
1517         used_vertex = PETSC_FALSE;
1518         k = 0;
1519         while (!used_vertex && k<nnsp_size) {
1520           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1521           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1522             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1523             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1524             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1525             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1526             total_counts++;
1527             used_vertex = PETSC_TRUE;
1528           }
1529           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1530           k++;
1531         }
1532       }
1533     }
1534     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1535     n_vertices = total_counts;
1536   }
1537 
1538   /* edges and faces */
1539   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1540     if (i<n_ISForEdges) {
1541       used_IS = &ISForEdges[i];
1542       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1543     } else {
1544       used_IS = &ISForFaces[i-n_ISForEdges];
1545       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1546     }
1547     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1548     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1549     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1550     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1551     /* change of basis should not be performed on local periodic nodes */
1552     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1553     if (nnsp_has_cnst) {
1554       PetscScalar quad_value;
1555       temp_constraints++;
1556       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1557       for (j=0;j<size_of_constraint;j++) {
1558         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1559         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1560         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1561       }
1562       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1563       total_counts++;
1564     }
1565     for (k=0;k<nnsp_size;k++) {
1566       PetscReal real_value;
1567       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1568       for (j=0;j<size_of_constraint;j++) {
1569         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1570         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1571         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1572       }
1573       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1574       /* check if array is null on the connected component */
1575       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1576       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1577       if (real_value > 0.0) { /* keep indices and values */
1578         temp_constraints++;
1579         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1580         total_counts++;
1581       }
1582     }
1583     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1584     valid_constraints = temp_constraints;
1585     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1586     if (!pcbddc->use_nnsp_true && temp_constraints) {
1587       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1588 
1589 #if defined(PETSC_MISSING_LAPACK_GESVD)
1590       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1591          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1592          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1593             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1594             from that computed using LAPACKgesvd
1595          -> This is due to a different computation of eigenvectors in LAPACKheev
1596          -> The quality of the POD-computed basis will be the same */
1597       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1598       /* Store upper triangular part of correlation matrix */
1599       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1600       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1601       for (j=0;j<temp_constraints;j++) {
1602         for (k=0;k<j+1;k++) {
1603           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1604         }
1605       }
1606       /* compute eigenvalues and eigenvectors of correlation matrix */
1607       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1608       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1609 #if !defined(PETSC_USE_COMPLEX)
1610       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1611 #else
1612       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1613 #endif
1614       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1615       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1616       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1617       j=0;
1618       while (j < temp_constraints && singular_vals[j] < tol) j++;
1619       total_counts=total_counts-j;
1620       valid_constraints = temp_constraints-j;
1621       /* scale and copy POD basis into used quadrature memory */
1622       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1623       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1624       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1625       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1626       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1628       if (j<temp_constraints) {
1629         PetscInt ii;
1630         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1631         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1632         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1633         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1634         for (k=0;k<temp_constraints-j;k++) {
1635           for (ii=0;ii<size_of_constraint;ii++) {
1636             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1637           }
1638         }
1639       }
1640 #else  /* on missing GESVD */
1641       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1642       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1643       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1644       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1645 #if !defined(PETSC_USE_COMPLEX)
1646       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1647 #else
1648       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1649 #endif
1650       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1651       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1652       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1653       k = temp_constraints;
1654       if (k > size_of_constraint) k = size_of_constraint;
1655       j = 0;
1656       while (j < k && singular_vals[k-j-1] < tol) j++;
1657       total_counts = total_counts-temp_constraints+k-j;
1658       valid_constraints = k-j;
1659 #endif /* on missing GESVD */
1660     }
1661     /* setting change_of_basis flag is safe now */
1662     if (boolforchange) {
1663       for (j=0;j<valid_constraints;j++) {
1664         PetscBTSet(change_basis,total_counts-j-1);
1665       }
1666     }
1667   }
1668   /* free index sets of faces, edges and vertices */
1669   for (i=0;i<n_ISForFaces;i++) {
1670     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1671   }
1672   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1673   for (i=0;i<n_ISForEdges;i++) {
1674     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1675   }
1676   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1677   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1678 
1679   /* free workspace */
1680   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1681     ierr = PetscFree(work);CHKERRQ(ierr);
1682 #if defined(PETSC_USE_COMPLEX)
1683     ierr = PetscFree(rwork);CHKERRQ(ierr);
1684 #endif
1685     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1686 #if defined(PETSC_MISSING_LAPACK_GESVD)
1687     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1688     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1689 #endif
1690   }
1691   for (k=0;k<nnsp_size;k++) {
1692     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1693   }
1694   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1695 
1696   /* set quantities in pcbddc data structure */
1697   /* n_vertices defines the number of subdomain corners in the primal space */
1698   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1699   pcbddc->local_primal_size = total_counts;
1700   pcbddc->n_vertices = n_vertices;
1701   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1702 
1703   /* Create constraint matrix */
1704   /* The constraint matrix is used to compute the l2g map of primal dofs */
1705   /* so we need to set it up properly either with or without change of basis */
1706   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1707   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1708   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1709   /* array to compute a local numbering of constraints : vertices first then constraints */
1710   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1711   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1712   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1713   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1714   /* auxiliary stuff for basis change */
1715   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1716   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1717 
1718   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1719   total_primal_vertices=0;
1720   for (i=0;i<pcbddc->local_primal_size;i++) {
1721     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1722     if (size_of_constraint == 1) {
1723       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1724       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1725       aux_primal_minloc[total_primal_vertices]=0;
1726       total_primal_vertices++;
1727     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1728       PetscInt min_loc,min_index;
1729       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1730       /* find first untouched local node */
1731       k = 0;
1732       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1733       min_index = global_indices[k];
1734       min_loc = k;
1735       /* search the minimum among global nodes already untouched on the cc */
1736       for (k=1;k<size_of_constraint;k++) {
1737         /* there can be more than one constraint on a single connected component */
1738         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1739           min_index = global_indices[k];
1740           min_loc = k;
1741         }
1742       }
1743       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1744       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1745       aux_primal_minloc[total_primal_vertices]=min_loc;
1746       total_primal_vertices++;
1747     }
1748   }
1749   /* determine if a QR strategy is needed for change of basis */
1750   qr_needed = PETSC_FALSE;
1751   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1752   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1753     if (PetscBTLookup(change_basis,i)) {
1754       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1755       j = 0;
1756       for (k=0;k<size_of_constraint;k++) {
1757         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1758           j++;
1759         }
1760       }
1761       /* found more than one primal dof on the cc */
1762       if (j > 1) {
1763         PetscBTSet(qr_needed_idx,i);
1764         qr_needed = PETSC_TRUE;
1765       }
1766     }
1767   }
1768   /* free workspace */
1769   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1770 
1771   /* permute indices in order to have a sorted set of vertices */
1772   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1773 
1774   /* nonzero structure of constraint matrix */
1775   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1776   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1777   j=total_primal_vertices;
1778   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1779     if (!PetscBTLookup(change_basis,i)) {
1780       nnz[j]=temp_indices[i+1]-temp_indices[i];
1781       j++;
1782     }
1783   }
1784   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1785   ierr = PetscFree(nnz);CHKERRQ(ierr);
1786   /* set values in constraint matrix */
1787   for (i=0;i<total_primal_vertices;i++) {
1788     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1789   }
1790   total_counts = total_primal_vertices;
1791   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1792     if (!PetscBTLookup(change_basis,i)) {
1793       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1794       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1795       total_counts++;
1796     }
1797   }
1798   /* assembling */
1799   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1800   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1801   /*
1802   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1803   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1804   */
1805   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1806   if (pcbddc->use_change_of_basis) {
1807     /* dual and primal dofs on a single cc */
1808     PetscInt     dual_dofs,primal_dofs;
1809     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1810     PetscInt     primal_counter;
1811     /* working stuff for GEQRF */
1812     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1813     PetscBLASInt lqr_work;
1814     /* working stuff for UNGQR */
1815     PetscScalar  *gqr_work,lgqr_work_t;
1816     PetscBLASInt lgqr_work;
1817     /* working stuff for TRTRS */
1818     PetscScalar  *trs_rhs;
1819     PetscBLASInt Blas_NRHS;
1820     /* pointers for values insertion into change of basis matrix */
1821     PetscInt     *start_rows,*start_cols;
1822     PetscScalar  *start_vals;
1823     /* working stuff for values insertion */
1824     PetscBT      is_primal;
1825 
1826     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1827     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1828     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1829     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1830     /* work arrays */
1831     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1832     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1833     /* nonzeros per row */
1834     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1835       if (PetscBTLookup(change_basis,i)) {
1836         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1837         if (PetscBTLookup(qr_needed_idx,i)) {
1838           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1839         } else {
1840           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1841           /* get local primal index on the cc */
1842           j = 0;
1843           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1844           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1845         }
1846       }
1847     }
1848     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1849     ierr = PetscFree(nnz);CHKERRQ(ierr);
1850     /* Set initial identity in the matrix */
1851     for (i=0;i<pcis->n_B;i++) {
1852       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1853     }
1854 
1855     if (pcbddc->dbg_flag) {
1856       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1857       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1858     }
1859 
1860 
1861     /* Now we loop on the constraints which need a change of basis */
1862     /*
1863        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1864        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1865 
1866        Basic blocks of change of basis matrix T computed
1867 
1868           - Using the following block transformation if there is only a primal dof on the cc
1869             (in the example, primal dof is the last one of the edge in LOCAL ordering
1870              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1871             | 1        0   ...        0     1 |
1872             | 0        1   ...        0     1 |
1873             |              ...                |
1874             | 0        ...            1     1 |
1875             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1876 
1877           - via QR decomposition of constraints otherwise
1878     */
1879     if (qr_needed) {
1880       /* space to store Q */
1881       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1882       /* first we issue queries for optimal work */
1883       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1884       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1885       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1886       lqr_work = -1;
1887       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1888       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1889       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1890       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1891       lgqr_work = -1;
1892       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1893       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1894       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1895       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1896       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1897       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1898       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1899       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1900       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1901       /* array to store scaling factors for reflectors */
1902       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1903       /* array to store rhs and solution of triangular solver */
1904       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1905       /* allocating workspace for check */
1906       if (pcbddc->dbg_flag) {
1907         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1908       }
1909     }
1910     /* array to store whether a node is primal or not */
1911     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1912     for (i=0;i<total_primal_vertices;i++) {
1913       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1914     }
1915 
1916     /* loop on constraints and see whether or not they need a change of basis and compute it */
1917     /* -> using implicit ordering contained in temp_indices data */
1918     total_counts = pcbddc->n_vertices;
1919     primal_counter = total_counts;
1920     while (total_counts<pcbddc->local_primal_size) {
1921       primal_dofs = 1;
1922       if (PetscBTLookup(change_basis,total_counts)) {
1923         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1924         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1925           primal_dofs++;
1926         }
1927         /* get constraint info */
1928         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1929         dual_dofs = size_of_constraint-primal_dofs;
1930 
1931         if (pcbddc->dbg_flag) {
1932           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1933         }
1934 
1935         if (primal_dofs > 1) { /* QR */
1936 
1937           /* copy quadrature constraints for change of basis check */
1938           if (pcbddc->dbg_flag) {
1939             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1940           }
1941           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1942           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1943 
1944           /* compute QR decomposition of constraints */
1945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1946           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1947           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1948           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1949           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1950           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1951           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1952 
1953           /* explictly compute R^-T */
1954           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1955           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1956           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1957           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1958           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1959           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1960           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1961           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1962           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1963           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1964 
1965           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1966           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1967           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1968           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1969           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1970           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1971           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1972           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1973           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1974 
1975           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1976              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1977              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1978           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1979           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1980           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1981           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1982           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1983           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1984           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1985           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1986           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1987           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1988 
1989           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1990           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1991           /* insert cols for primal dofs */
1992           for (j=0;j<primal_dofs;j++) {
1993             start_vals = &qr_basis[j*size_of_constraint];
1994             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1995             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1996           }
1997           /* insert cols for dual dofs */
1998           for (j=0,k=0;j<dual_dofs;k++) {
1999             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2000               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2001               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2002               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2003               j++;
2004             }
2005           }
2006 
2007           /* check change of basis */
2008           if (pcbddc->dbg_flag) {
2009             PetscInt   ii,jj;
2010             PetscBool valid_qr=PETSC_TRUE;
2011             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2012             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2013             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2014             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2015             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2016             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2017             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2018             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2019             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2020             for (jj=0;jj<size_of_constraint;jj++) {
2021               for (ii=0;ii<primal_dofs;ii++) {
2022                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2023                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2024               }
2025             }
2026             if (!valid_qr) {
2027               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2028               for (jj=0;jj<size_of_constraint;jj++) {
2029                 for (ii=0;ii<primal_dofs;ii++) {
2030                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2031                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2032                   }
2033                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2034                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2035                   }
2036                 }
2037               }
2038             } else {
2039               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2040             }
2041           }
2042         } else { /* simple transformation block */
2043           PetscInt row,col;
2044           PetscScalar val;
2045           for (j=0;j<size_of_constraint;j++) {
2046             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2047             if (!PetscBTLookup(is_primal,row)) {
2048               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2049               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2050               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2051             } else {
2052               for (k=0;k<size_of_constraint;k++) {
2053                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2054                 if (row != col) {
2055                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2056                 } else {
2057                   val = 1.0;
2058                 }
2059                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2060               }
2061             }
2062           }
2063         }
2064         /* increment primal counter */
2065         primal_counter += primal_dofs;
2066       } else {
2067         if (pcbddc->dbg_flag) {
2068           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2069         }
2070       }
2071       /* increment constraint counter total_counts */
2072       total_counts += primal_dofs;
2073     }
2074 
2075     /* free workspace */
2076     if (qr_needed) {
2077       if (pcbddc->dbg_flag) {
2078         ierr = PetscFree(work);CHKERRQ(ierr);
2079       }
2080       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2081       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2082       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2083       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2084       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2085     }
2086     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2087     /* assembling */
2088     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090     /*
2091     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2092     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2093     */
2094   }
2095 
2096   /* flush dbg viewer */
2097   if (pcbddc->dbg_flag) {
2098     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2099   }
2100 
2101   /* free workspace */
2102   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2103   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2104   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2105   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2106   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2107   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2108   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2109   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2110   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2111   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2117 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2118 {
2119   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2120   PC_IS       *pcis = (PC_IS*)pc->data;
2121   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2122   PetscInt    bs,ierr,i,vertex_size;
2123   PetscViewer viewer=pcbddc->dbg_viewer;
2124 
2125   PetscFunctionBegin;
2126   /* Reset previously computed graph */
2127   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2128   /* Init local Graph struct */
2129   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2130 
2131   /* Check validity of the csr graph passed in by the user */
2132   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2133     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2134   }
2135 
2136   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2137   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2138     Mat mat_adj;
2139     const PetscInt *xadj,*adjncy;
2140     PetscBool flg_row=PETSC_TRUE;
2141 
2142     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2143     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2144     if (!flg_row) {
2145       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2146     }
2147     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2148     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2149     if (!flg_row) {
2150       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2151     }
2152     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2153   }
2154 
2155   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2156   vertex_size = 1;
2157   if (!pcbddc->user_provided_isfordofs) {
2158     if (!pcbddc->n_ISForDofs) {
2159       IS *custom_ISForDofs;
2160 
2161       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2162       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2163       for (i=0;i<bs;i++) {
2164         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2165       }
2166       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2167       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2168       /* remove my references to IS objects */
2169       for (i=0;i<bs;i++) {
2170         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2171       }
2172       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2173     }
2174   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2175     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2176   }
2177 
2178   /* Setup of Graph */
2179   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2180 
2181   /* Graph's connected components analysis */
2182   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2183 
2184   /* print some info to stdout */
2185   if (pcbddc->dbg_flag) {
2186     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2193 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2194 {
2195   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2196   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   n = 0;
2201   vertices = 0;
2202   if (pcbddc->ConstraintMatrix) {
2203     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2204     for (i=0;i<local_primal_size;i++) {
2205       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2206       if (size_of_constraint == 1) n++;
2207       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2208     }
2209     if (vertices_idx) {
2210       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2211       n = 0;
2212       for (i=0;i<local_primal_size;i++) {
2213         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2214         if (size_of_constraint == 1) {
2215           vertices[n++]=row_cmat_indices[0];
2216         }
2217         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2218       }
2219     }
2220   }
2221   *n_vertices = n;
2222   if (vertices_idx) *vertices_idx = vertices;
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2228 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2229 {
2230   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2231   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2232   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2233   PetscBT        touched;
2234   PetscErrorCode ierr;
2235 
2236     /* This function assumes that the number of local constraints per connected component
2237        is not greater than the number of nodes defined for the connected component
2238        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2239   PetscFunctionBegin;
2240   n = 0;
2241   constraints_index = 0;
2242   if (pcbddc->ConstraintMatrix) {
2243     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2244     max_size_of_constraint = 0;
2245     for (i=0;i<local_primal_size;i++) {
2246       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2247       if (size_of_constraint > 1) {
2248         n++;
2249       }
2250       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2251       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2252     }
2253     if (constraints_idx) {
2254       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2255       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2256       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2257       n = 0;
2258       for (i=0;i<local_primal_size;i++) {
2259         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2260         if (size_of_constraint > 1) {
2261           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2262           /* find first untouched local node */
2263           j = 0;
2264           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2265           min_index = row_cmat_global_indices[j];
2266           min_loc = j;
2267           /* search the minimum among nodes not yet touched on the connected component
2268              since there can be more than one constraint on a single cc */
2269           for (j=1;j<size_of_constraint;j++) {
2270             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2271               min_index = row_cmat_global_indices[j];
2272               min_loc = j;
2273             }
2274           }
2275           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2276           constraints_index[n++] = row_cmat_indices[min_loc];
2277         }
2278         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2279       }
2280       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2281       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2282     }
2283   }
2284   *n_constraints = n;
2285   if (constraints_idx) *constraints_idx = constraints_index;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 /* the next two functions has been adapted from pcis.c */
2290 #undef __FUNCT__
2291 #define __FUNCT__ "PCBDDCApplySchur"
2292 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2293 {
2294   PetscErrorCode ierr;
2295   PC_IS          *pcis = (PC_IS*)(pc->data);
2296 
2297   PetscFunctionBegin;
2298   if (!vec2_B) { vec2_B = v; }
2299   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2300   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2301   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2302   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2303   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2309 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2310 {
2311   PetscErrorCode ierr;
2312   PC_IS          *pcis = (PC_IS*)(pc->data);
2313 
2314   PetscFunctionBegin;
2315   if (!vec2_B) { vec2_B = v; }
2316   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2317   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2318   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2319   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2320   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "PCBDDCSubsetNumbering"
2326 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2327 {
2328   Vec            local_vec,global_vec;
2329   IS             seqis,paris;
2330   VecScatter     scatter_ctx;
2331   PetscScalar    *array;
2332   PetscInt       *temp_global_dofs;
2333   PetscScalar    globalsum;
2334   PetscInt       i,j,s;
2335   PetscInt       nlocals,first_index,old_index,max_local;
2336   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2337   PetscMPIInt    *dof_sizes,*dof_displs;
2338   PetscBool      first_found;
2339   PetscErrorCode ierr;
2340 
2341   PetscFunctionBegin;
2342   /* mpi buffers */
2343   MPI_Comm_size(comm,&size_prec_comm);
2344   MPI_Comm_rank(comm,&rank_prec_comm);
2345   j = ( !rank_prec_comm ? size_prec_comm : 0);
2346   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2347   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2348   /* get maximum size of subset */
2349   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2350   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2351   max_local = 0;
2352   if (n_local_dofs) {
2353     max_local = temp_global_dofs[0];
2354     for (i=1;i<n_local_dofs;i++) {
2355       if (max_local < temp_global_dofs[i] ) {
2356         max_local = temp_global_dofs[i];
2357       }
2358     }
2359   }
2360   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2361   max_global++;
2362   max_local = 0;
2363   if (n_local_dofs) {
2364     max_local = local_dofs[0];
2365     for (i=1;i<n_local_dofs;i++) {
2366       if (max_local < local_dofs[i] ) {
2367         max_local = local_dofs[i];
2368       }
2369     }
2370   }
2371   max_local++;
2372   /* allocate workspace */
2373   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2374   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2375   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2376   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2377   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2378   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2379   /* create scatter */
2380   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2381   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2382   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2383   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2384   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2385   /* init array */
2386   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2387   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2388   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2389   if (local_dofs_mult) {
2390     for (i=0;i<n_local_dofs;i++) {
2391       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2392     }
2393   } else {
2394     for (i=0;i<n_local_dofs;i++) {
2395       array[local_dofs[i]]=1.0;
2396     }
2397   }
2398   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2399   /* scatter into global vec and get total number of global dofs */
2400   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2401   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2403   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2404   /* Fill global_vec with cumulative function for global numbering */
2405   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2406   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2407   nlocals = 0;
2408   first_index = -1;
2409   first_found = PETSC_FALSE;
2410   for (i=0;i<s;i++) {
2411     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2412       first_found = PETSC_TRUE;
2413       first_index = i;
2414     }
2415     nlocals += (PetscInt)PetscRealPart(array[i]);
2416   }
2417   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2418   if (!rank_prec_comm) {
2419     dof_displs[0]=0;
2420     for (i=1;i<size_prec_comm;i++) {
2421       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2422     }
2423   }
2424   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2425   if (first_found) {
2426     array[first_index] += (PetscScalar)nlocals;
2427     old_index = first_index;
2428     for (i=first_index+1;i<s;i++) {
2429       if (PetscRealPart(array[i]) > 0.0) {
2430         array[i] += array[old_index];
2431         old_index = i;
2432       }
2433     }
2434   }
2435   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2436   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2437   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2438   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2439   /* get global ordering of local dofs */
2440   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2441   if (local_dofs_mult) {
2442     for (i=0;i<n_local_dofs;i++) {
2443       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2444     }
2445   } else {
2446     for (i=0;i<n_local_dofs;i++) {
2447       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2448     }
2449   }
2450   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2451   /* free workspace */
2452   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2453   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2454   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2455   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2456   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2457   /* return pointer to global ordering of local dofs */
2458   *global_numbering_subset = temp_global_dofs;
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2464 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2465 {
2466   PetscInt       i,j;
2467   PetscScalar    *alphas;
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   /* this implements stabilized Gram-Schmidt */
2472   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2473   for (i=0;i<n;i++) {
2474     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2475     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2476     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2477   }
2478   ierr = PetscFree(alphas);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatConvert_IS_AIJ"
2485 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2486 {
2487   Mat new_mat;
2488   Mat_IS *matis = (Mat_IS*)(mat->data);
2489   /* info on mat */
2490   PetscInt bs,rows,cols;
2491   PetscInt lrows,lcols;
2492   PetscInt local_rows,local_cols;
2493   PetscMPIInt nsubdomains;
2494   /* preallocation */
2495   Vec vec_dnz,vec_onz;
2496   PetscScalar *my_dnz,*my_onz,*array;
2497   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2498   PetscInt  index_row,index_col,owner;
2499   PetscInt  *local_indices,*global_indices;
2500   /* work */
2501   PetscInt i,j;
2502   PetscErrorCode ierr;
2503 
2504   PetscFunctionBegin;
2505   /* CHECKS */
2506   /* get info from mat */
2507   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2508   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2509   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2510   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2511 
2512   /* MAT_INITIAL_MATRIX */
2513   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2514   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2515   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2516   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2517   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2518   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2519 
2520   /*
2521     preallocation
2522   */
2523 
2524   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2525   /*
2526      Some vectors are needed to sum up properly on shared interface dofs.
2527      Note that preallocation is not exact, since it overestimates nonzeros
2528   */
2529 /*
2530   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2531   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2532   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2533   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2534 */
2535   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2536   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2537   /* All processes need to compute entire row ownership */
2538   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2539   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2540   for (i=0;i<nsubdomains;i++) {
2541     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2542       row_ownership[j]=i;
2543     }
2544   }
2545   /* map indices of local mat to global values */
2546   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2547   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2548   for (i=0;i<local_rows;i++) local_indices[i]=i;
2549   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2550   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2551 
2552   /*
2553      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2554      then, they will be summed up properly. This way, preallocation is always sufficient
2555   */
2556   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2557   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2558   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2559   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2560   for (i=0;i<local_rows;i++) {
2561     index_row = global_indices[i];
2562     for (j=i;j<local_rows;j++) {
2563       owner = row_ownership[index_row];
2564       index_col = global_indices[j];
2565       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2566         my_dnz[i] += 1.0;
2567       } else { /* offdiag block */
2568         my_onz[i] += 1.0;
2569       }
2570       /* same as before, interchanging rows and cols */
2571       if (i != j) {
2572         owner = row_ownership[index_col];
2573         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2574           my_dnz[j] += 1.0;
2575         } else {
2576           my_onz[j] += 1.0;
2577         }
2578       }
2579     }
2580   }
2581   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2582   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2583   if (local_rows) { /* multilevel guard */
2584     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2585     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2586   }
2587   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2588   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2589   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2590   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2591   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2592   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2593   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2594 
2595   /* set computed preallocation in dnz and onz */
2596   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2597   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2598   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2599   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2600   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2601   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2602   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2603   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2604 
2605   /* Resize preallocation if overestimated */
2606   for (i=0;i<lrows;i++) {
2607     dnz[i] = PetscMin(dnz[i],lcols);
2608     onz[i] = PetscMin(onz[i],cols-lcols);
2609   }
2610   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2611   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2612 
2613   /*
2614     Set values. Very Basic.
2615   */
2616   for (i=0;i<local_rows;i++) {
2617     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2618     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2619     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2620     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2621     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2622   }
2623   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2624   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2625   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2626 
2627   /* get back mat */
2628   *M = new_mat;
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 #undef __FUNCT__
2633 #define __FUNCT__ "MatISSubassemble_Private"
2634 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2635 {
2636   Mat             subdomain_adj;
2637   IS              new_ranks,ranks_send_to;
2638   MatPartitioning partitioner;
2639   Mat_IS          *matis;
2640   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2641   PetscInt        prank;
2642   PetscMPIInt     size,rank,color;
2643   PetscInt        *xadj,*adjncy,*oldranks;
2644   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2645   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2646   PetscErrorCode  ierr;
2647   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2648   PetscSubcomm    subcomm;
2649 
2650   PetscFunctionBegin;
2651   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2652   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2653   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2654 
2655   /* Get info on mapping */
2656   matis = (Mat_IS*)(mat->data);
2657   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2658   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2659 
2660   /* build local CSR graph of subdomains' connectivity */
2661   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2662   xadj[0] = 0;
2663   xadj[1] = PetscMax(n_neighs-1,0);
2664   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2665   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2666 
2667   if (threshold) {
2668     PetscInt* count,min_threshold;
2669     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2670     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2671     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2672       for (j=0;j<n_shared[i];j++) {
2673         count[shared[i][j]] += 1;
2674       }
2675     }
2676     /* adapt threshold since we dont want to lose significant connections */
2677     min_threshold = n_neighs;
2678     for (i=1;i<n_neighs;i++) {
2679       for (j=0;j<n_shared[i];j++) {
2680         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2681       }
2682     }
2683     threshold = PetscMax(min_threshold+1,threshold);
2684     xadj[1] = 0;
2685     for (i=1;i<n_neighs;i++) {
2686       for (j=0;j<n_shared[i];j++) {
2687         if (count[shared[i][j]] < threshold) {
2688           adjncy[xadj[1]] = neighs[i];
2689           adjncy_wgt[xadj[1]] = n_shared[i];
2690           xadj[1]++;
2691           break;
2692         }
2693       }
2694     }
2695     ierr = PetscFree(count);CHKERRQ(ierr);
2696   } else {
2697     if (xadj[1]) {
2698       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2699       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2700     }
2701   }
2702   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2703   if (use_square) {
2704     for (i=0;i<xadj[1];i++) {
2705       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2706     }
2707   }
2708   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2709 
2710   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2711 
2712   /*
2713     Restrict work on active processes only.
2714   */
2715   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2716   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2717   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2718   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2719   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2720   if (color) {
2721     ierr = PetscFree(xadj);CHKERRQ(ierr);
2722     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2723     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2724   } else {
2725     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2726     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2727     prank = rank;
2728     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2729     for (i=0;i<size;i++) {
2730       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2731     }
2732     for (i=0;i<xadj[1];i++) {
2733       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2734     }
2735     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2736     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2737     n_subdomains = (PetscInt)size;
2738     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2739 
2740     /* Partition */
2741     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2742     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2743     if (use_vwgt) {
2744       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2745       v_wgt[0] = local_size;
2746       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2747     }
2748     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2749     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2750     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2751     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2752     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2753 
2754     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2755     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2756     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2757     /* clean up */
2758     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2759     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2760     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2761     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2762   }
2763   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2764 
2765   /* assemble parallel IS for sends */
2766   i = 1;
2767   if (color) i=0;
2768   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2769   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2770 
2771   /* get back IS */
2772   *is_sends = ranks_send_to;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatISSubassemble"
2780 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2781 {
2782   Mat                    local_mat,new_mat;
2783   Mat_IS                 *matis;
2784   IS                     is_sends_internal;
2785   PetscInt               rows,cols;
2786   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2787   PetscBool              ismatis,isdense;
2788   ISLocalToGlobalMapping l2gmap;
2789   PetscInt*              l2gmap_indices;
2790   const PetscInt*        is_indices;
2791   MatType                new_local_type;
2792   MatTypePrivate         new_local_type_private;
2793   /* buffers */
2794   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2795   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2796   /* MPI */
2797   MPI_Comm               comm;
2798   PetscMPIInt            n_sends,n_recvs,commsize;
2799   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2800   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2801   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2802   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2803   PetscErrorCode         ierr;
2804 
2805   PetscFunctionBegin;
2806   /* checks */
2807   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2808   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2809   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2810   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2811   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2812   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2813   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2814   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2815   PetscValidLogicalCollectiveInt(mat,bs,0);
2816   /* prepare IS for sending if not provided */
2817   if (!is_sends) {
2818     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2819     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2820   } else {
2821     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2822     is_sends_internal = is_sends;
2823   }
2824 
2825   /* get pointer of MATIS data */
2826   matis = (Mat_IS*)mat->data;
2827 
2828   /* get comm */
2829   comm = PetscObjectComm((PetscObject)mat);
2830 
2831   /* compute number of sends */
2832   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2833   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2834 
2835   /* compute number of receives */
2836   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2837   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2838   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2839   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2840   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2841   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2842   ierr = PetscFree(iflags);CHKERRQ(ierr);
2843 
2844   /* prepare send/receive buffers */
2845   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2846   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2847   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2848   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2849 
2850   /* Get data from local mat */
2851   if (!isdense) {
2852     /* TODO: See below some guidelines on how to prepare the local buffers */
2853     /*
2854        send_buffer_vals should contain the raw values of the local matrix
2855        send_buffer_idxs should contain:
2856        - MatType_PRIVATE type
2857        - PetscInt        size_of_l2gmap
2858        - PetscInt        global_row_indices[size_of_l2gmap]
2859        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2860     */
2861   } else {
2862     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2863     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2864     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2865     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2866     send_buffer_idxs[1] = i;
2867     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2868     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2869     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2870     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2871     for (i=0;i<n_sends;i++) {
2872       ilengths_vals[is_indices[i]] = len*len;
2873       ilengths_idxs[is_indices[i]] = len+2;
2874     }
2875   }
2876   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2877   buf_size_idxs = 0;
2878   buf_size_vals = 0;
2879   for (i=0;i<n_recvs;i++) {
2880     buf_size_idxs += (PetscInt)olengths_idxs[i];
2881     buf_size_vals += (PetscInt)olengths_vals[i];
2882   }
2883   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2884   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2885 
2886   /* get new tags for clean communications */
2887   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2888   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2889 
2890   /* allocate for requests */
2891   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2892   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2893   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2894   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2895 
2896   /* communications */
2897   ptr_idxs = recv_buffer_idxs;
2898   ptr_vals = recv_buffer_vals;
2899   for (i=0;i<n_recvs;i++) {
2900     source_dest = onodes[i];
2901     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2902     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2903     ptr_idxs += olengths_idxs[i];
2904     ptr_vals += olengths_vals[i];
2905   }
2906   for (i=0;i<n_sends;i++) {
2907     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2908     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2909     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2910   }
2911   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2912   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2913 
2914   /* assemble new l2g map */
2915   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2916   ptr_idxs = recv_buffer_idxs;
2917   buf_size_idxs = 0;
2918   for (i=0;i<n_recvs;i++) {
2919     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2920     ptr_idxs += olengths_idxs[i];
2921   }
2922   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2923   ptr_idxs = recv_buffer_idxs;
2924   buf_size_idxs = 0;
2925   for (i=0;i<n_recvs;i++) {
2926     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2927     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2928     ptr_idxs += olengths_idxs[i];
2929   }
2930   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2931   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2932   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2933 
2934   /* infer new local matrix type from received local matrices type */
2935   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2936   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2937   new_local_type_private = MATAIJ_PRIVATE;
2938   new_local_type = MATSEQAIJ;
2939   if (n_recvs) {
2940     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2941     ptr_idxs = recv_buffer_idxs;
2942     for (i=0;i<n_recvs;i++) {
2943       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2944         new_local_type_private = MATAIJ_PRIVATE;
2945         break;
2946       }
2947       ptr_idxs += olengths_idxs[i];
2948     }
2949     switch (new_local_type_private) {
2950       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2951         new_local_type = MATSEQAIJ;
2952         bs = 1;
2953         break;
2954       case MATAIJ_PRIVATE:
2955         new_local_type = MATSEQAIJ;
2956         bs = 1;
2957         break;
2958       case MATBAIJ_PRIVATE:
2959         new_local_type = MATSEQBAIJ;
2960         break;
2961       case MATSBAIJ_PRIVATE:
2962         new_local_type = MATSEQSBAIJ;
2963         break;
2964       default:
2965         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2966         break;
2967     }
2968   }
2969 
2970   /* create MATIS object */
2971   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2972   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2973   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2974   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2975   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2976   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2977 
2978   /* set values */
2979   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2980   ptr_vals = recv_buffer_vals;
2981   ptr_idxs = recv_buffer_idxs;
2982   for (i=0;i<n_recvs;i++) {
2983     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2984       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2985       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2986       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2987       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2988       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2989     }
2990     ptr_idxs += olengths_idxs[i];
2991     ptr_vals += olengths_vals[i];
2992   }
2993   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997 
2998   { /* check */
2999     Vec       lvec,rvec;
3000     PetscReal infty_error;
3001 
3002     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3003     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3004     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3005     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3006     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3007     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3008     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3009     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3010     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3011   }
3012 
3013   /* free workspace */
3014   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3015   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3016   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3017   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3018   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3019   if (isdense) {
3020     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3021     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3022   } else {
3023     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3024   }
3025   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3026   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3027   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3028   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3029   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3030   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3031   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3032   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3033   ierr = PetscFree(onodes);CHKERRQ(ierr);
3034   /* get back new mat */
3035   *mat_n = new_mat;
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 #undef __FUNCT__
3040 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3041 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3042 {
3043   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3044   PC_IS                  *pcis = (PC_IS*)pc->data;
3045   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3046   MatNullSpace           CoarseNullSpace=NULL;
3047   ISLocalToGlobalMapping coarse_islg;
3048   IS                     coarse_is;
3049   PetscInt               max_it;
3050   PetscInt               im_active=-1,active_procs=-1;
3051   PC                     pc_temp;
3052   PCType                 coarse_pc_type;
3053   KSPType                coarse_ksp_type;
3054   PetscBool              multilevel_requested,multilevel_allowed;
3055   PetscBool              setsym,issym,isbddc,isnn;
3056   MatStructure           matstruct;
3057   PetscErrorCode         ierr;
3058 
3059   PetscFunctionBegin;
3060   /* Assign global numbering to coarse dofs */
3061   if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */
3062     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3063     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3064     /* see if we can avoid some work */
3065     if (pcbddc->coarse_ksp) { /* already present */
3066       Mat tcoarse_mat;
3067       PetscInt ocoarse_size;
3068       ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr);
3069       ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr);
3070       if (ocoarse_size != pcbddc->coarse_size) { /* we need to destroy the KSP */
3071         ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
3072       }
3073     }
3074   }
3075 
3076   /* infer some info from user */
3077   issym = PETSC_FALSE;
3078   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3079   multilevel_allowed = PETSC_FALSE;
3080   multilevel_requested = PETSC_FALSE;
3081   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3082   if (multilevel_requested) {
3083     /* count "active processes" */
3084     im_active = !!(pcis->n);
3085     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3086     if (active_procs/pcbddc->coarsening_ratio < 2) {
3087       multilevel_allowed = PETSC_FALSE;
3088     } else {
3089       multilevel_allowed = PETSC_TRUE;
3090     }
3091   }
3092 
3093   /* set defaults for coarse KSP and PC */
3094   if (multilevel_allowed) {
3095     if (issym) {
3096       coarse_ksp_type = KSPRICHARDSON;
3097     } else {
3098       coarse_ksp_type = KSPCHEBYSHEV;
3099     }
3100     coarse_pc_type = PCBDDC;
3101   } else {
3102     coarse_ksp_type = KSPPREONLY;
3103     coarse_pc_type = PCREDUNDANT;
3104   }
3105 
3106   /* create the coarse KSP object only once with defaults */
3107   if (!pcbddc->coarse_ksp) {
3108     char prefix[256],str_level[3];
3109     size_t len;
3110     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3111     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3112     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3113     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3114     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3115     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3116     /* prefix */
3117     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3118     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3119     if (!pcbddc->current_level) {
3120       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3121       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3122     } else {
3123       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3124       if (pcbddc->current_level>1) len -= 2;
3125       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3126       *(prefix+len)='\0';
3127       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3128       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3129     }
3130     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3131   }
3132   /* allow user customization */
3133   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3134   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3135   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3136 
3137   /* get some info after set from options */
3138   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3139   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3140   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3141   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3142     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3143     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3144     isbddc = PETSC_FALSE;
3145   }
3146 
3147   /* propagate BDDC info to the next level */
3148   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3149   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3150   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3151 
3152   /* creates temporary MATIS object for coarse matrix */
3153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3154   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3155   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3156   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3157   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3158   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3161   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3162 
3163   /* assemble coarse matrix */
3164   if (isbddc || isnn) {
3165     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3166   } else { /* need to provide reuse */
3167     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3168   }
3169   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3170 
3171   /* create local to global scatters for coarse problem */
3172   if (pcbddc->new_primal_space) {
3173     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3174     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3175     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3176     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3177     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3178   }
3179   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3180 
3181   /* propagate symmetry info to coarse matrix */
3182   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3183 
3184   /* Compute coarse null space (special handling by BDDC only) */
3185   if (pcbddc->NullSpace) {
3186     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3187     if (isbddc) {
3188       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3189     } else {
3190       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3191     }
3192   }
3193 
3194   /* set operators */
3195   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3196   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3197 
3198   /* additional KSP customization */
3199   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3200   if (max_it < 5) {
3201     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3202   }
3203   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3204 
3205 
3206   /* print some info if requested */
3207   if (pcbddc->dbg_flag) {
3208     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3209     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3210     if (!multilevel_allowed) {
3211       if (multilevel_requested) {
3212         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3213       } else if (pcbddc->max_levels) {
3214         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3215       }
3216     }
3217     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3218     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3219   }
3220 
3221   /* setup coarse ksp */
3222   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3223   if (pcbddc->dbg_flag) {
3224     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3225     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3226     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3227     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3228   }
3229 
3230   /* Check coarse problem if requested */
3231   if (pcbddc->dbg_flag) {
3232     KSP       check_ksp;
3233     KSPType   check_ksp_type;
3234     PC        check_pc;
3235     Vec       check_vec;
3236     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3237     PetscInt  its;
3238     PetscBool ispreonly,compute;
3239 
3240     /* Create ksp object suitable for estimation of extreme eigenvalues */
3241     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3242     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3243     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3244     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3245     if (ispreonly) {
3246       check_ksp_type = KSPPREONLY;
3247       compute = PETSC_FALSE;
3248     } else {
3249       if (issym) check_ksp_type = KSPCG;
3250       else check_ksp_type = KSPGMRES;
3251       compute = PETSC_TRUE;
3252     }
3253     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3254     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3255     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3256     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3257     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3258     /* create random vec */
3259     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3260     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3261     if (CoarseNullSpace) {
3262       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3263     }
3264     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3265     /* solve coarse problem */
3266     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3267     if (CoarseNullSpace) {
3268       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3269     }
3270     /* check coarse problem residual error */
3271     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3272     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3273     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3274     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3275     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3276     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3277     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3278     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3279     /* get eigenvalue estimation if preonly has not been requested */
3280     if (!ispreonly) {
3281       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3282       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3283       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3284     }
3285     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3286     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3287   }
3288   /* free memory */
3289   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3290   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3291   PetscFunctionReturn(0);
3292 }
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3296 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3297 {
3298   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3299   PC_IS*         pcis = (PC_IS*)pc->data;
3300   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3301   PetscInt       i,j,coarse_size;
3302   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3303   PetscErrorCode ierr;
3304 
3305   PetscFunctionBegin;
3306   /* get indices in local ordering for vertices and constraints */
3307   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3308   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3309   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3310   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3311   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3312   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3313   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3314 
3315   /* Compute global number of coarse dofs */
3316   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3317 
3318   /* check numbering */
3319   if (pcbddc->dbg_flag) {
3320     PetscScalar coarsesum,*array;
3321 
3322     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3323     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3324     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3325     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3326     for (i=0;i<pcbddc->local_primal_size;i++) {
3327       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3328     }
3329     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3330     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3331     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3332     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3333     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3334     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3335     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3336     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3337     for (i=0;i<pcis->n;i++) {
3338       if (array[i] == 1.0) {
3339         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3340       }
3341     }
3342     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3343     for (i=0;i<pcis->n;i++) {
3344       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3345     }
3346     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3347     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3348     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3349     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3350     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3351     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3352     if (pcbddc->dbg_flag > 1) {
3353       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3354       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3355       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3356       for (i=0;i<pcbddc->local_primal_size;i++) {
3357         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3358       }
3359       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3360     }
3361     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3362   }
3363   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3364   /* get back data */
3365   *coarse_size_n = coarse_size;
3366   *local_primal_indices_n = local_primal_indices;
3367   PetscFunctionReturn(0);
3368 }
3369 
3370