xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 39e2fb2acbe04a08c612a51de4e522023d995c41)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscInt       i;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
54   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
55   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   for (i=0;i<pcbddc->n_ISForDofs;i++) {
59     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
60   }
61   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
62   pcbddc->n_ISForDofs = 0;
63   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
64   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "PCBDDCResetTopography"
70 PetscErrorCode PCBDDCResetTopography(PC pc)
71 {
72   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
77   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
78   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCResetSolvers"
84 PetscErrorCode PCBDDCResetSolvers(PC pc)
85 {
86   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
87   PetscErrorCode ierr;
88 
89   PetscFunctionBegin;
90   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
91   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_vertices,n_constraints,local_primal_size,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr);
131   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
132   n_constraints = local_primal_size - n_vertices;
133   n_R = pcis->n-n_vertices;
134   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
135   /* local work vectors (try to avoid unneeded work)*/
136   /* R nodes */
137   old_size = -1;
138   if (pcbddc->vec1_R) {
139     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
140   }
141   if (n_R != old_size) {
142     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
144     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
145     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
146     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
147     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
148   }
149   /* local primal dofs */
150   old_size = -1;
151   if (pcbddc->vec1_P) {
152     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
153   }
154   if (local_primal_size != old_size) {
155     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
157     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
158     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
159   }
160   /* local explicit constraints */
161   old_size = -1;
162   if (pcbddc->vec1_C) {
163     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
164   }
165   if (n_constraints && n_constraints != old_size) {
166     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
168     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
169     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "PCBDDCSetUpCorrection"
176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
177 {
178   PetscErrorCode         ierr;
179   /* pointers to pcis and pcbddc */
180   PC_IS*                 pcis = (PC_IS*)pc->data;
181   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
182   /* submatrices of local problem */
183   Mat                    A_RV,A_VR,A_VV;
184   /* working matrices */
185   Mat                    M1,M2,M3,C_CR;
186   /* working vectors */
187   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
188   /* additional working stuff */
189   IS                     is_aux;
190   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
191   const PetscScalar      *array,*row_cmat_values;
192   const PetscInt         *row_cmat_indices,*idx_R_local;
193   PetscInt               *vertices,*idx_V_B,*auxindices;
194   PetscInt               n_vertices,n_constraints,size_of_constraint;
195   PetscInt               i,j,n_R,n_D,n_B;
196   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
197   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
198   MatType                impMatType;
199   /* some shortcuts to scalars */
200   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
201   /* for debugging purposes */
202   PetscReal              *coarsefunctions_errors,*constraints_errors;
203 
204   PetscFunctionBegin;
205   /* get number of vertices and their local indices */
206   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
222   if (i != n_vertices) {
223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224   }
225 
226   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227   if (n_constraints) {
228     /* see if we can save some allocations */
229     if (pcbddc->local_auxmat2) {
230       PetscInt on_R,on_constraints;
231       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
232       if (on_R != n_R || on_constraints != n_constraints) {
233         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
234         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
235       }
236     }
237     /* work vectors */
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
240     /* auxiliary matrices */
241     if (!pcbddc->local_auxmat2) {
242       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
243       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
244       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
245       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
246     }
247 
248     /* Extract constraints on R nodes: C_{CR}  */
249     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
250     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
251     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
252 
253     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254     for (i=0;i<n_constraints;i++) {
255       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
256       /* Get row of constraint matrix in R numbering */
257       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
259       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
261       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
262       /* Solve for row of constraint matrix in R numbering */
263       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
264       /* Set values in local_auxmat2 */
265       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
267       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268     }
269     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
272 
273     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
275     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
276     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
277     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
278     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
279     ierr = MatSetUp(M1);CHKERRQ(ierr);
280     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
281     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
282     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
283     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
285     ierr = MatDestroy(&M2);CHKERRQ(ierr);
286     ierr = MatDestroy(&M3);CHKERRQ(ierr);
287     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288     if (!pcbddc->local_auxmat1) {
289       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     } else {
291       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     }
293   }
294 
295   /* Get submatrices from subdomain matrix */
296   if (n_vertices) {
297     PetscInt ibs,mbs;
298     PetscBool issbaij;
299     Mat newmat;
300 
301     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
304     if (ibs != mbs) { /* need to convert to SEQAIJ */
305       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
309       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
310     } else {
311       /* this is safe */
312       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
314       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
316         /* which of the two approaches is faster? */
317         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
319         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
321         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
322       } else {
323         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
324         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
325       }
326     }
327     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
328     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
329     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
330   }
331 
332   /* Matrix of coarse basis functions (local) */
333   if (pcbddc->coarse_phi_B) {
334     PetscInt on_B,on_primal;
335     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
336     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
338       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
339     }
340   }
341   if (pcbddc->coarse_phi_D) {
342     PetscInt on_D,on_primal;
343     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
344     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
346       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
347     }
348   }
349   if (!pcbddc->coarse_phi_B) {
350     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
351     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
352     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
353     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
354   }
355   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
357     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
358     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
359     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
360   }
361 
362   if (pcbddc->dbg_flag) {
363     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
364     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
365     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
366   }
367   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371 
372   /* vertices */
373   for (i=0;i<n_vertices;i++) {
374     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
376     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
377     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
378     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
379     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
380     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
384     if (n_constraints) {
385       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
386       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
388     }
389     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
390     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
391 
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
395     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
399     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
401     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     if (n_constraints) {
413       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
416     }
417 
418     /* check */
419     if (pcbddc->dbg_flag) {
420       /* assemble subdomain vector on local nodes */
421       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
427       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
428       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
430       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       if (n_constraints) {
434         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       }
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
441       /* check saddle point solution */
442       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
445       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
446       /* shift by the identity matrix */
447       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
451     }
452   }
453 
454   /* constraints */
455   for (i=0;i<n_constraints;i++) {
456     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
457     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
458     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
459     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
460     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
462     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
463     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
464     if (n_vertices) {
465       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
466     }
467     /* Set values in coarse basis function and subdomain part of coarse_mat */
468     /* coarse basis functions */
469     j = i+n_vertices; /* don't touch this! */
470     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
471     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482     }
483     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484     if (n_vertices) {
485       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
486       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
488     }
489     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
490     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
492 
493     if (pcbddc->dbg_flag) {
494       /* assemble subdomain vector on nodes */
495       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
496       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
498       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
500       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
501       /* assemble subdomain vector of lagrange multipliers */
502       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
503       if (n_vertices) {
504         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
505         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
506         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
507       }
508       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
514       /* check saddle point solution */
515       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
518       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
519       /* shift by the identity matrix */
520       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
521       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
524     }
525   }
526   /* call assembling routines for local coarse basis */
527   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   }
533 
534   /* compute other basis functions for non-symmetric problems */
535   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
536   if (!setsym || (setsym && !issym)) {
537     if (!pcbddc->coarse_psi_B) {
538       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
539       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
540       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
541       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
542     }
543     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
545       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
546       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
547       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
548     }
549     for (i=0;i<pcbddc->local_primal_size;i++) {
550       if (n_constraints) {
551         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
552         for (j=0;j<n_constraints;j++) {
553           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
554         }
555         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
556         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
557       }
558       if (i<n_vertices) {
559         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
560         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
561         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
562         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
563         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
564         if (n_constraints) {
565           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
566         }
567       } else {
568         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
569       }
570       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
571       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
572       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
576       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       if (i<n_vertices) {
578         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
579       }
580       if (pcbddc->switch_static || pcbddc->dbg_flag) {
581         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
585         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586       }
587 
588       if (pcbddc->dbg_flag) {
589         /* assemble subdomain vector on nodes */
590         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
591         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
593         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         if (i<n_vertices) {
595           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
596         }
597         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
598         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
599         /* assemble subdomain vector of lagrange multipliers */
600         for (j=0;j<pcbddc->local_primal_size;j++) {
601           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
602         }
603         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
604         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
605         /* check saddle point solution */
606         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
609         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
610         /* shift by the identity matrix */
611         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
612         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
615       }
616     }
617     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     if (pcbddc->switch_static || pcbddc->dbg_flag) {
620       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622     }
623   }
624   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
625   /* Checking coarse_sub_mat and coarse basis functios */
626   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
627   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
628   if (pcbddc->dbg_flag) {
629     Mat         coarse_sub_mat;
630     Mat         AUXMAT,TM1,TM2,TM3,TM4;
631     Mat         coarse_phi_D,coarse_phi_B;
632     Mat         coarse_psi_D,coarse_psi_B;
633     Mat         A_II,A_BB,A_IB,A_BI;
634     MatType     checkmattype=MATSEQAIJ;
635     PetscReal   real_value;
636 
637     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
639     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
640     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
641     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
642     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
643     if (pcbddc->coarse_psi_B) {
644       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
645       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
646     }
647     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
648 
649     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
650     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
651     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
652     if (pcbddc->coarse_psi_B) {
653       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
654       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
655       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
656       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
657       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
658       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
659       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
660       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
661       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
662       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
663       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
664       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
665     } else {
666       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
667       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
668       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674     }
675     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
679     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
680     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
685     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
686     for (i=0;i<pcbddc->local_primal_size;i++) {
687       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
688     }
689     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
690     for (i=0;i<pcbddc->local_primal_size;i++) {
691       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
692     }
693     if (pcbddc->coarse_psi_B) {
694       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
695       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
696         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
697       }
698       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
699       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
700         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
701       }
702     }
703     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
707     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
711     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
712     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
713     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
714     if (pcbddc->coarse_psi_B) {
715       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
716       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
717     }
718     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
719     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
720     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
721     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
722   }
723   /* free memory */
724   if (n_vertices) {
725     ierr = PetscFree(vertices);CHKERRQ(ierr);
726     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
727     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
731   }
732   if (n_constraints) {
733     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
735     ierr = MatDestroy(&M1);CHKERRQ(ierr);
736     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
737   }
738   ierr = PetscFree(auxindices);CHKERRQ(ierr);
739   /* get back data */
740   *coarse_submat_vals_n = coarse_submat_vals;
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
747 {
748   PC_IS*            pcis = (PC_IS*)(pc->data);
749   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
750   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
751   PetscBool         issbaij,isbaij;
752   /* manage repeated solves */
753   MatReuse          reuse;
754   MatStructure      matstruct;
755   PetscErrorCode    ierr;
756 
757   PetscFunctionBegin;
758   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
759     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
760   }
761   /* get mat flags */
762   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
763   reuse = MAT_INITIAL_MATRIX;
764   if (pc->setupcalled) {
765     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
766     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
767     if (matstruct == SAME_NONZERO_PATTERN) {
768       reuse = MAT_REUSE_MATRIX;
769     } else {
770       reuse = MAT_INITIAL_MATRIX;
771     }
772   }
773   if (reuse == MAT_INITIAL_MATRIX) {
774     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
777     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
779   }
780 
781   /* transform local matrices if needed */
782   if (pcbddc->use_change_of_basis) {
783     Mat         change_mat_all;
784     PetscScalar *row_cmat_values;
785     PetscInt    *row_cmat_indices;
786     PetscInt    *nnz,*is_indices,*temp_indices;
787     PetscInt    i,j,k,n_D,n_B;
788 
789     /* Get Non-overlapping dimensions */
790     n_B = pcis->n_B;
791     n_D = pcis->n-n_B;
792 
793     /* compute nonzero structure of change of basis on all local nodes */
794     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
797     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
798     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     k=1;
800     for (i=0;i<n_B;i++) {
801       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802       nnz[is_indices[i]]=j;
803       if (k < j) k = j;
804       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
805     }
806     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
807     /* assemble change of basis matrix on the whole set of local dofs */
808     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
809     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
810     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
811     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
812     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
813     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     for (i=0;i<n_D;i++) {
815       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
819     for (i=0;i<n_B;i++) {
820       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
822       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
823       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
824     }
825     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
826     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
828     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
829     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
830     if (!issbaij && !isbaij) {
831       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
832     } else {
833       Mat work_mat;
834       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
835       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
836       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
837     }
838     /*
839     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
840     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
841     */
842     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
843     ierr = PetscFree(nnz);CHKERRQ(ierr);
844     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
845   } else {
846     /* without change of basis, the local matrix is unchanged */
847     if (!pcbddc->local_mat) {
848       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
849       pcbddc->local_mat = matis->A;
850     }
851   }
852 
853   /* get submatrices */
854   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
855   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
856   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
857   if (!issbaij) {
858     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
859     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
860   } else {
861     Mat newmat;
862     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
863     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
865     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
873 {
874   PC_IS*         pcis = (PC_IS*)(pc->data);
875   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
876   IS             is_aux1,is_aux2;
877   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
878   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
879   PetscInt       vbs,bs;
880   PetscBT        bitmask;
881   PetscErrorCode ierr;
882 
883   PetscFunctionBegin;
884   /* No need to setup local scatters if primal space is unchanged */
885   if (!pcbddc->new_primal_space_local) {
886     PetscFunctionReturn(0);
887   }
888   /* destroy old objects */
889   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
890   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
892   /* Set Non-overlapping dimensions */
893   n_B = pcis->n_B; n_D = pcis->n - n_B;
894   /* get vertex indices from constraint matrix */
895   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
896 
897   /* create auxiliary bitmask */
898   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
899   for (i=0;i<n_vertices;i++) {
900     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
901   }
902 
903   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
904   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
905   for (i=0, n_R=0; i<pcis->n; i++) {
906     if (!PetscBTLookup(bitmask,i)) {
907       idx_R_local[n_R] = i;
908       n_R++;
909     }
910   }
911 
912   /* Block code */
913   vbs = 1;
914   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
915   if (bs>1 && !(n_vertices%bs)) {
916     PetscBool is_blocked = PETSC_TRUE;
917     PetscInt  *vary;
918     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
919     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
920     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
921     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
922     for (i=0; i<n_vertices; i++) {
923       if (vary[i]!=0 && vary[i]!=bs) {
924         is_blocked = PETSC_FALSE;
925         break;
926       }
927     }
928     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
929       vbs = bs;
930       for (i=0;i<n_R/vbs;i++) {
931         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
932       }
933     }
934     ierr = PetscFree(vary);CHKERRQ(ierr);
935   }
936   ierr = PetscFree(vertices);CHKERRQ(ierr);
937   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
938   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
939 
940   /* print some info if requested */
941   if (pcbddc->dbg_flag) {
942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
948     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
949     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
950   }
951 
952   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
953   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
954   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
955   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
956   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
957   for (i=0; i<n_D; i++) {
958     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
959   }
960   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
961   for (i=0, j=0; i<n_R; i++) {
962     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
963       aux_array1[j++] = i;
964     }
965   }
966   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
967   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_B; i++) {
969     if (!PetscBTLookup(bitmask,is_indices[i])) {
970       aux_array2[j++] = i;
971     }
972   }
973   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
975   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
976   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
977   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
978 
979   if (pcbddc->switch_static || pcbddc->dbg_flag) {
980     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
981     for (i=0, j=0; i<n_R; i++) {
982       if (PetscBTLookup(bitmask,idx_R_local[i])) {
983         aux_array1[j++] = i;
984       }
985     }
986     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
987     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
988     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
989   }
990   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
991   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
998 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
999 {
1000   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1001   PC_IS          *pcis = (PC_IS*)pc->data;
1002   PC             pc_temp;
1003   Mat            A_RR;
1004   MatStructure   matstruct;
1005   MatReuse       reuse;
1006   PetscScalar    m_one = -1.0;
1007   PetscReal      value;
1008   PetscInt       n_D,n_R,ibs,mbs;
1009   PetscBool      use_exact,use_exact_reduced,issbaij;
1010   PetscErrorCode ierr;
1011   /* prefixes stuff */
1012   char           dir_prefix[256],neu_prefix[256],str_level[3];
1013   size_t         len;
1014 
1015   PetscFunctionBegin;
1016   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1017 
1018   /* compute prefixes */
1019   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1020   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1021   if (!pcbddc->current_level) {
1022     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1023     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1025     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1026   } else {
1027     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1028     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1029     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1030     len -= 15; /* remove "pc_bddc_coarse_" */
1031     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1032     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1033     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     *(dir_prefix+len)='\0';
1035     *(neu_prefix+len)='\0';
1036     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1037     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1039     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1040   }
1041 
1042   /* DIRICHLET PROBLEM */
1043   /* Matrix for Dirichlet problem is pcis->A_II */
1044   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1045   if (!pcbddc->ksp_D) { /* create object if not yet build */
1046     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1047     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1048     /* default */
1049     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1050     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1051     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1052     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1053     if (issbaij) {
1054       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1055     } else {
1056       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1057     }
1058     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1059   }
1060   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1061   /* Allow user's customization */
1062   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1063   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1064   if (!n_D) {
1065     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1066     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1067   }
1068   /* Set Up KSP for Dirichlet problem of BDDC */
1069   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1070   /* set ksp_D into pcis data */
1071   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1072   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1073   pcis->ksp_D = pcbddc->ksp_D;
1074 
1075   /* NEUMANN PROBLEM */
1076   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1077   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1078   if (pcbddc->ksp_R) { /* already created ksp */
1079     PetscInt nn_R;
1080     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1081     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1082     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1083     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1084       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1085       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1086       reuse = MAT_INITIAL_MATRIX;
1087     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1088       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1089         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090         reuse = MAT_INITIAL_MATRIX;
1091       } else { /* safe to reuse the matrix */
1092         reuse = MAT_REUSE_MATRIX;
1093       }
1094     }
1095     /* last check */
1096     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1097       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1098       reuse = MAT_INITIAL_MATRIX;
1099     }
1100   } else { /* first time, so we need to create the matrix */
1101     reuse = MAT_INITIAL_MATRIX;
1102   }
1103   /* extract A_RR */
1104   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1105   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1106   if (ibs != mbs) {
1107     Mat newmat;
1108     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1109     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1110     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1113   }
1114   if (!pcbddc->ksp_R) { /* create object if not present */
1115     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1116     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1117     /* default */
1118     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1119     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1120     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1121     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1122     if (issbaij) {
1123       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1124     } else {
1125       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1126     }
1127     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1128   }
1129   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1130   /* Allow user's customization */
1131   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1132   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1133   if (!n_R) {
1134     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1135     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1136   }
1137   /* Set Up KSP for Neumann problem of BDDC */
1138   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1139 
1140   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1141   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1142     /* Dirichlet */
1143     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1144     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1145     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1146     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1147     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1148     /* need to be adapted? */
1149     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1150     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1151     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1152     /* print info */
1153     if (pcbddc->dbg_flag) {
1154       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1155       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1156       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1157       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1158       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1159       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1160     }
1161     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1162       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1163     }
1164 
1165     /* Neumann */
1166     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1167     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1168     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1169     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1170     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1171     /* need to be adapted? */
1172     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1173     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1174     /* print info */
1175     if (pcbddc->dbg_flag) {
1176       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1177       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1178     }
1179     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1180       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1181     }
1182   }
1183   /* free Neumann problem's matrix */
1184   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1190 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1191 {
1192   PetscErrorCode ierr;
1193   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1194 
1195   PetscFunctionBegin;
1196   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1197   if (pcbddc->local_auxmat1) {
1198     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1199     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1206 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1207 {
1208   PetscErrorCode ierr;
1209   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1210   PC_IS*            pcis = (PC_IS*)  (pc->data);
1211   const PetscScalar zero = 0.0;
1212 
1213   PetscFunctionBegin;
1214   /* Application of PHI^T (or PSI^T)  */
1215   if (pcbddc->coarse_psi_B) {
1216     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1217     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1218   } else {
1219     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1220     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1221   }
1222   /* Scatter data of coarse_rhs */
1223   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1224   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225 
1226   /* Local solution on R nodes */
1227   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1228   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   if (pcbddc->switch_static) {
1231     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1232     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   }
1234   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1235   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238   if (pcbddc->switch_static) {
1239     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1241   }
1242 
1243   /* Coarse solution */
1244   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1246     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1247   }
1248   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1249   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1250 
1251   /* Sum contributions from two levels */
1252   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1253   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1259 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1260 {
1261   PetscErrorCode ierr;
1262   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1263 
1264   PetscFunctionBegin;
1265   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1271 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1272 {
1273   PetscErrorCode ierr;
1274   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1275 
1276   PetscFunctionBegin;
1277   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /* uncomment for testing purposes */
1282 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1283 #undef __FUNCT__
1284 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1285 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1286 {
1287   PetscErrorCode    ierr;
1288   PC_IS*            pcis = (PC_IS*)(pc->data);
1289   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1290   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1291   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1292   MatType           impMatType=MATSEQAIJ;
1293   /* one and zero */
1294   PetscScalar       one=1.0,zero=0.0;
1295   /* space to store constraints and their local indices */
1296   PetscScalar       *temp_quadrature_constraint;
1297   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1298   /* iterators */
1299   PetscInt          i,j,k,total_counts,temp_start_ptr;
1300   /* stuff to store connected components stored in pcbddc->mat_graph */
1301   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1302   PetscInt          n_ISForFaces,n_ISForEdges;
1303   /* near null space stuff */
1304   MatNullSpace      nearnullsp;
1305   const Vec         *nearnullvecs;
1306   Vec               *localnearnullsp;
1307   PetscBool         nnsp_has_cnst;
1308   PetscInt          nnsp_size;
1309   PetscScalar       *array;
1310   /* BLAS integers */
1311   PetscBLASInt      lwork,lierr;
1312   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1313   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1314   /* LAPACK working arrays for SVD or POD */
1315   PetscBool         skip_lapack;
1316   PetscScalar       *work;
1317   PetscReal         *singular_vals;
1318 #if defined(PETSC_USE_COMPLEX)
1319   PetscReal         *rwork;
1320 #endif
1321 #if defined(PETSC_MISSING_LAPACK_GESVD)
1322   PetscBLASInt      Blas_one_2=1;
1323   PetscScalar       *temp_basis,*correlation_mat;
1324 #else
1325   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1326   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1327 #endif
1328   /* reuse */
1329   PetscInt          olocal_primal_size;
1330   PetscInt          *oprimal_indices_local_idxs;
1331   /* change of basis */
1332   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1333   PetscBool         boolforchange,qr_needed;
1334   PetscBT           touched,change_basis,qr_needed_idx;
1335   /* auxiliary stuff */
1336   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1337   /* some quantities */
1338   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1339   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1340 
1341 
1342   PetscFunctionBegin;
1343   /* Destroy Mat objects computed previously */
1344   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1345   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1346   /* Get index sets for faces, edges and vertices from graph */
1347   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1348     pcbddc->use_vertices = PETSC_TRUE;
1349   }
1350   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1351   /* HACK: provide functions to set change of basis */
1352   if (!ISForVertices && pcbddc->NullSpace) {
1353     pcbddc->use_change_of_basis = PETSC_TRUE;
1354     pcbddc->use_change_on_faces = PETSC_FALSE;
1355   }
1356   /* print some info */
1357   if (pcbddc->dbg_flag) {
1358     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1359     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1360     i = 0;
1361     if (ISForVertices) {
1362       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1365     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1366     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1367     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1368   }
1369   /* check if near null space is attached to global mat */
1370   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1371   if (nearnullsp) {
1372     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1373     /* remove any stored info */
1374     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1375     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1376     /* store information for BDDC solver reuse */
1377     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1378     pcbddc->onearnullspace = nearnullsp;
1379     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1380     for (i=0;i<nnsp_size;i++) {
1381       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1382     }
1383   } else { /* if near null space is not provided BDDC uses constants by default */
1384     nnsp_size = 0;
1385     nnsp_has_cnst = PETSC_TRUE;
1386   }
1387   /* get max number of constraints on a single cc */
1388   max_constraints = nnsp_size;
1389   if (nnsp_has_cnst) max_constraints++;
1390 
1391   /*
1392        Evaluate maximum storage size needed by the procedure
1393        - temp_indices will contain start index of each constraint stored as follows
1394        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1395        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1396        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1397                                                                                                                                                          */
1398   total_counts = n_ISForFaces+n_ISForEdges;
1399   total_counts *= max_constraints;
1400   n_vertices = 0;
1401   if (ISForVertices) {
1402     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1403   }
1404   total_counts += n_vertices;
1405   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1406   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1407   total_counts = 0;
1408   max_size_of_constraint = 0;
1409   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1410     if (i<n_ISForEdges) {
1411       used_IS = &ISForEdges[i];
1412     } else {
1413       used_IS = &ISForFaces[i-n_ISForEdges];
1414     }
1415     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1416     total_counts += j;
1417     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1418   }
1419   total_counts *= max_constraints;
1420   total_counts += n_vertices;
1421   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1422   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1423   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1424   /* get local part of global near null space vectors */
1425   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1426   for (k=0;k<nnsp_size;k++) {
1427     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1428     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1429     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1430   }
1431 
1432   /* whether or not to skip lapack calls */
1433   skip_lapack = PETSC_TRUE;
1434   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1435 
1436   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1437   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1438     PetscScalar temp_work;
1439 #if defined(PETSC_MISSING_LAPACK_GESVD)
1440     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1441     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1442     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1443     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1444 #if defined(PETSC_USE_COMPLEX)
1445     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1446 #endif
1447     /* now we evaluate the optimal workspace using query with lwork=-1 */
1448     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1449     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1450     lwork = -1;
1451     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1452 #if !defined(PETSC_USE_COMPLEX)
1453     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1454 #else
1455     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1456 #endif
1457     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1458     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1459 #else /* on missing GESVD */
1460     /* SVD */
1461     PetscInt max_n,min_n;
1462     max_n = max_size_of_constraint;
1463     min_n = max_constraints;
1464     if (max_size_of_constraint < max_constraints) {
1465       min_n = max_size_of_constraint;
1466       max_n = max_constraints;
1467     }
1468     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1469 #if defined(PETSC_USE_COMPLEX)
1470     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1471 #endif
1472     /* now we evaluate the optimal workspace using query with lwork=-1 */
1473     lwork = -1;
1474     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1475     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1476     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1477     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1478 #if !defined(PETSC_USE_COMPLEX)
1479     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1480 #else
1481     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1482 #endif
1483     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1484     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1485 #endif /* on missing GESVD */
1486     /* Allocate optimal workspace */
1487     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1488     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1489   }
1490   /* Now we can loop on constraining sets */
1491   total_counts = 0;
1492   temp_indices[0] = 0;
1493   /* vertices */
1494   if (ISForVertices) {
1495     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1496     if (nnsp_has_cnst) { /* consider all vertices */
1497       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1498       for (i=0;i<n_vertices;i++) {
1499         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1500         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1501         total_counts++;
1502       }
1503     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1504       PetscBool used_vertex;
1505       for (i=0;i<n_vertices;i++) {
1506         used_vertex = PETSC_FALSE;
1507         k = 0;
1508         while (!used_vertex && k<nnsp_size) {
1509           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1510           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1511             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1512             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1513             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1514             total_counts++;
1515             used_vertex = PETSC_TRUE;
1516           }
1517           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1518           k++;
1519         }
1520       }
1521     }
1522     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1523     n_vertices = total_counts;
1524   }
1525 
1526   /* edges and faces */
1527   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1528     if (i<n_ISForEdges) {
1529       used_IS = &ISForEdges[i];
1530       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1531     } else {
1532       used_IS = &ISForFaces[i-n_ISForEdges];
1533       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1534     }
1535     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1536     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1537     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1538     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1539     /* change of basis should not be performed on local periodic nodes */
1540     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1541     if (nnsp_has_cnst) {
1542       PetscScalar quad_value;
1543       temp_constraints++;
1544       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1545       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1546       for (j=0;j<size_of_constraint;j++) {
1547         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1548       }
1549       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1550       total_counts++;
1551     }
1552     for (k=0;k<nnsp_size;k++) {
1553       PetscReal real_value;
1554       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1555       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1556       for (j=0;j<size_of_constraint;j++) {
1557         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1558       }
1559       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1560       /* check if array is null on the connected component */
1561       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1562       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1563       if (real_value > 0.0) { /* keep indices and values */
1564         temp_constraints++;
1565         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1566         total_counts++;
1567       }
1568     }
1569     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1570     valid_constraints = temp_constraints;
1571     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1572     if (!pcbddc->use_nnsp_true && temp_constraints) {
1573       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1574 
1575 #if defined(PETSC_MISSING_LAPACK_GESVD)
1576       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1577          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1578          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1579             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1580             from that computed using LAPACKgesvd
1581          -> This is due to a different computation of eigenvectors in LAPACKheev
1582          -> The quality of the POD-computed basis will be the same */
1583       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1584       /* Store upper triangular part of correlation matrix */
1585       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1586       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1587       for (j=0;j<temp_constraints;j++) {
1588         for (k=0;k<j+1;k++) {
1589           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1590         }
1591       }
1592       /* compute eigenvalues and eigenvectors of correlation matrix */
1593       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1594       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1595 #if !defined(PETSC_USE_COMPLEX)
1596       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1597 #else
1598       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1599 #endif
1600       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1601       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1602       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1603       j=0;
1604       while (j < temp_constraints && singular_vals[j] < tol) j++;
1605       total_counts=total_counts-j;
1606       valid_constraints = temp_constraints-j;
1607       /* scale and copy POD basis into used quadrature memory */
1608       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1609       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1610       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1611       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1612       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1613       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1614       if (j<temp_constraints) {
1615         PetscInt ii;
1616         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1617         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1618         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1619         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1620         for (k=0;k<temp_constraints-j;k++) {
1621           for (ii=0;ii<size_of_constraint;ii++) {
1622             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1623           }
1624         }
1625       }
1626 #else  /* on missing GESVD */
1627       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1628       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1629       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1630       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1631 #if !defined(PETSC_USE_COMPLEX)
1632       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1633 #else
1634       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1635 #endif
1636       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1637       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1638       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1639       k = temp_constraints;
1640       if (k > size_of_constraint) k = size_of_constraint;
1641       j = 0;
1642       while (j < k && singular_vals[k-j-1] < tol) j++;
1643       total_counts = total_counts-temp_constraints+k-j;
1644       valid_constraints = k-j;
1645 #endif /* on missing GESVD */
1646     }
1647     /* setting change_of_basis flag is safe now */
1648     if (boolforchange) {
1649       for (j=0;j<valid_constraints;j++) {
1650         PetscBTSet(change_basis,total_counts-j-1);
1651       }
1652     }
1653   }
1654   /* free index sets of faces, edges and vertices */
1655   for (i=0;i<n_ISForFaces;i++) {
1656     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1657   }
1658   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1659   for (i=0;i<n_ISForEdges;i++) {
1660     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1661   }
1662   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1663   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1664   /* map temp_indices_to_constraint in boundary numbering */
1665   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1666   if (i != temp_indices[total_counts]) {
1667     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1668   }
1669 
1670   /* free workspace */
1671   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1672     ierr = PetscFree(work);CHKERRQ(ierr);
1673 #if defined(PETSC_USE_COMPLEX)
1674     ierr = PetscFree(rwork);CHKERRQ(ierr);
1675 #endif
1676     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1677 #if defined(PETSC_MISSING_LAPACK_GESVD)
1678     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1679     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1680 #endif
1681   }
1682   for (k=0;k<nnsp_size;k++) {
1683     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1684   }
1685   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1686 
1687   /* set quantities in pcbddc data structure and store previous primal size */
1688   /* n_vertices defines the number of subdomain corners in the primal space */
1689   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1690   olocal_primal_size = pcbddc->local_primal_size;
1691   pcbddc->local_primal_size = total_counts;
1692   pcbddc->n_vertices = n_vertices;
1693   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1694 
1695   /* Create constraint matrix */
1696   /* The constraint matrix is used to compute the l2g map of primal dofs */
1697   /* so we need to set it up properly either with or without change of basis */
1698   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1699   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1700   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1701   /* array to compute a local numbering of constraints : vertices first then constraints */
1702   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1703   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1704   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1705   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1706   /* auxiliary stuff for basis change */
1707   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1708   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1709 
1710   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1711   total_primal_vertices=0;
1712   for (i=0;i<pcbddc->local_primal_size;i++) {
1713     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1714     if (size_of_constraint == 1) {
1715       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1716       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1717       aux_primal_minloc[total_primal_vertices]=0;
1718       total_primal_vertices++;
1719     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1720       PetscInt min_loc,min_index;
1721       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1722       /* find first untouched local node */
1723       k = 0;
1724       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1725       min_index = global_indices[k];
1726       min_loc = k;
1727       /* search the minimum among global nodes already untouched on the cc */
1728       for (k=1;k<size_of_constraint;k++) {
1729         /* there can be more than one constraint on a single connected component */
1730         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1731           min_index = global_indices[k];
1732           min_loc = k;
1733         }
1734       }
1735       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1736       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1737       aux_primal_minloc[total_primal_vertices]=min_loc;
1738       total_primal_vertices++;
1739     }
1740   }
1741   /* determine if a QR strategy is needed for change of basis */
1742   qr_needed = PETSC_FALSE;
1743   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1744   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1745     if (PetscBTLookup(change_basis,i)) {
1746       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1747       j = 0;
1748       for (k=0;k<size_of_constraint;k++) {
1749         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1750           j++;
1751         }
1752       }
1753       /* found more than one primal dof on the cc */
1754       if (j > 1) {
1755         PetscBTSet(qr_needed_idx,i);
1756         qr_needed = PETSC_TRUE;
1757       }
1758     }
1759   }
1760   /* free workspace */
1761   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1762 
1763   /* permute indices in order to have a sorted set of vertices */
1764   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1765 
1766   /* nonzero structure of constraint matrix */
1767   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1768   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1769   j=total_primal_vertices;
1770   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1771     if (!PetscBTLookup(change_basis,i)) {
1772       nnz[j]=temp_indices[i+1]-temp_indices[i];
1773       j++;
1774     }
1775   }
1776   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1777   ierr = PetscFree(nnz);CHKERRQ(ierr);
1778   /* set values in constraint matrix */
1779   for (i=0;i<total_primal_vertices;i++) {
1780     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1781   }
1782   total_counts = total_primal_vertices;
1783   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1784     if (!PetscBTLookup(change_basis,i)) {
1785       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1786       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1787       total_counts++;
1788     }
1789   }
1790   /* assembling */
1791   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1792   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   /*
1794   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1795   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1796   */
1797   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1798   if (pcbddc->use_change_of_basis) {
1799     /* dual and primal dofs on a single cc */
1800     PetscInt     dual_dofs,primal_dofs;
1801     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1802     PetscInt     primal_counter;
1803     /* working stuff for GEQRF */
1804     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1805     PetscBLASInt lqr_work;
1806     /* working stuff for UNGQR */
1807     PetscScalar  *gqr_work,lgqr_work_t;
1808     PetscBLASInt lgqr_work;
1809     /* working stuff for TRTRS */
1810     PetscScalar  *trs_rhs;
1811     PetscBLASInt Blas_NRHS;
1812     /* pointers for values insertion into change of basis matrix */
1813     PetscInt     *start_rows,*start_cols;
1814     PetscScalar  *start_vals;
1815     /* working stuff for values insertion */
1816     PetscBT      is_primal;
1817 
1818     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1819     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1820     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1821     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1822     /* work arrays */
1823     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1824     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1825     /* nonzeros per row */
1826     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1827       if (PetscBTLookup(change_basis,i)) {
1828         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1829         if (PetscBTLookup(qr_needed_idx,i)) {
1830           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1831         } else {
1832           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1833           /* get local primal index on the cc */
1834           j = 0;
1835           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1836           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1837         }
1838       }
1839     }
1840     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1841     ierr = PetscFree(nnz);CHKERRQ(ierr);
1842     /* Set initial identity in the matrix */
1843     for (i=0;i<pcis->n_B;i++) {
1844       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1845     }
1846 
1847     if (pcbddc->dbg_flag) {
1848       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1849       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1850     }
1851 
1852 
1853     /* Now we loop on the constraints which need a change of basis */
1854     /*
1855        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1856        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1857 
1858        Basic blocks of change of basis matrix T computed
1859 
1860           - Using the following block transformation if there is only a primal dof on the cc
1861             (in the example, primal dof is the last one of the edge in LOCAL ordering
1862              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1863             | 1        0   ...        0     1 |
1864             | 0        1   ...        0     1 |
1865             |              ...                |
1866             | 0        ...            1     1 |
1867             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1868 
1869           - via QR decomposition of constraints otherwise
1870     */
1871     if (qr_needed) {
1872       /* space to store Q */
1873       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1874       /* first we issue queries for optimal work */
1875       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1876       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1877       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1878       lqr_work = -1;
1879       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1880       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1881       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1882       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1883       lgqr_work = -1;
1884       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1885       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1886       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1887       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1888       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1889       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1890       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1891       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1892       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1893       /* array to store scaling factors for reflectors */
1894       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1895       /* array to store rhs and solution of triangular solver */
1896       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1897       /* allocating workspace for check */
1898       if (pcbddc->dbg_flag) {
1899         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1900       }
1901     }
1902     /* array to store whether a node is primal or not */
1903     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1904     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1905     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1906     if (i != total_primal_vertices) {
1907       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1908     }
1909     for (i=0;i<total_primal_vertices;i++) {
1910       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1911     }
1912     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1913 
1914     /* loop on constraints and see whether or not they need a change of basis and compute it */
1915     /* -> using implicit ordering contained in temp_indices data */
1916     total_counts = pcbddc->n_vertices;
1917     primal_counter = total_counts;
1918     while (total_counts<pcbddc->local_primal_size) {
1919       primal_dofs = 1;
1920       if (PetscBTLookup(change_basis,total_counts)) {
1921         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1922         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1923           primal_dofs++;
1924         }
1925         /* get constraint info */
1926         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1927         dual_dofs = size_of_constraint-primal_dofs;
1928 
1929         if (pcbddc->dbg_flag) {
1930           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1931         }
1932 
1933         if (primal_dofs > 1) { /* QR */
1934 
1935           /* copy quadrature constraints for change of basis check */
1936           if (pcbddc->dbg_flag) {
1937             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1938           }
1939           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1940           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1941 
1942           /* compute QR decomposition of constraints */
1943           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1944           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1946           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1947           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1948           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1949           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1950 
1951           /* explictly compute R^-T */
1952           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1953           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1954           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1955           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1956           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1957           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1958           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1959           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1960           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1961           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1962 
1963           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1964           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1965           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1966           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1967           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1968           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1969           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1970           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1971           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1972 
1973           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1974              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1975              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1976           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1977           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1978           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1979           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1980           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1981           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1982           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1983           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1984           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1985           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1986 
1987           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1988           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1989           /* insert cols for primal dofs */
1990           for (j=0;j<primal_dofs;j++) {
1991             start_vals = &qr_basis[j*size_of_constraint];
1992             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1993             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1994           }
1995           /* insert cols for dual dofs */
1996           for (j=0,k=0;j<dual_dofs;k++) {
1997             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1998               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1999               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2000               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2001               j++;
2002             }
2003           }
2004 
2005           /* check change of basis */
2006           if (pcbddc->dbg_flag) {
2007             PetscInt   ii,jj;
2008             PetscBool valid_qr=PETSC_TRUE;
2009             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2010             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2011             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2012             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2013             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2014             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2015             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2016             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2017             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2018             for (jj=0;jj<size_of_constraint;jj++) {
2019               for (ii=0;ii<primal_dofs;ii++) {
2020                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2021                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2022               }
2023             }
2024             if (!valid_qr) {
2025               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2026               for (jj=0;jj<size_of_constraint;jj++) {
2027                 for (ii=0;ii<primal_dofs;ii++) {
2028                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2029                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2030                   }
2031                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2032                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2033                   }
2034                 }
2035               }
2036             } else {
2037               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2038             }
2039           }
2040         } else { /* simple transformation block */
2041           PetscInt row,col;
2042           PetscScalar val;
2043           for (j=0;j<size_of_constraint;j++) {
2044             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2045             if (!PetscBTLookup(is_primal,row)) {
2046               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2047               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2048               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2049             } else {
2050               for (k=0;k<size_of_constraint;k++) {
2051                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2052                 if (row != col) {
2053                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2054                 } else {
2055                   val = 1.0;
2056                 }
2057                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2058               }
2059             }
2060           }
2061         }
2062         /* increment primal counter */
2063         primal_counter += primal_dofs;
2064       } else {
2065         if (pcbddc->dbg_flag) {
2066           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2067         }
2068       }
2069       /* increment constraint counter total_counts */
2070       total_counts += primal_dofs;
2071     }
2072 
2073     /* free workspace */
2074     if (qr_needed) {
2075       if (pcbddc->dbg_flag) {
2076         ierr = PetscFree(work);CHKERRQ(ierr);
2077       }
2078       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2079       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2080       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2081       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2082       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2083     }
2084     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2085     /* assembling */
2086     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088     /*
2089     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2090     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2091     */
2092   }
2093 
2094   /* get indices in local ordering for vertices and constraints */
2095   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2096     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2097     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2098   }
2099   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2100   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2101   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2102   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2103   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2104   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2105   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2106   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2107   /* check if a new primal space has been introduced */
2108   pcbddc->new_primal_space_local = PETSC_TRUE;
2109   if (olocal_primal_size == pcbddc->local_primal_size) {
2110     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2111     pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local;
2112     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2113   }
2114   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2115   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2116 
2117   /* flush dbg viewer */
2118   if (pcbddc->dbg_flag) {
2119     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2120   }
2121 
2122   /* free workspace */
2123   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2124   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2125   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2126   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2127   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2128   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2129   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2130   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2136 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2137 {
2138   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2139   PC_IS       *pcis = (PC_IS*)pc->data;
2140   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2141   PetscInt    bs,ierr,i,vertex_size;
2142   PetscViewer viewer=pcbddc->dbg_viewer;
2143 
2144   PetscFunctionBegin;
2145   /* Reset previously computed graph */
2146   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2147   /* Init local Graph struct */
2148   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2149 
2150   /* Check validity of the csr graph passed in by the user */
2151   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2152     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2153   }
2154 
2155   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2156   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2157     Mat mat_adj;
2158     const PetscInt *xadj,*adjncy;
2159     PetscBool flg_row=PETSC_TRUE;
2160 
2161     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2162     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2163     if (!flg_row) {
2164       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2165     }
2166     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2167     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2168     if (!flg_row) {
2169       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2170     }
2171     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2172   }
2173 
2174   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2175   vertex_size = 1;
2176   if (!pcbddc->user_provided_isfordofs) {
2177     if (!pcbddc->n_ISForDofs) {
2178       IS *custom_ISForDofs;
2179 
2180       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2181       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2182       for (i=0;i<bs;i++) {
2183         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2184       }
2185       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2186       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2187       /* remove my references to IS objects */
2188       for (i=0;i<bs;i++) {
2189         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2190       }
2191       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2192     }
2193   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2194     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2195   }
2196 
2197   /* Setup of Graph */
2198   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2199 
2200   /* Graph's connected components analysis */
2201   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2202 
2203   /* print some info to stdout */
2204   if (pcbddc->dbg_flag) {
2205     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2206   }
2207 
2208   /* mark topography has done */
2209   pcbddc->recompute_topography = PETSC_FALSE;
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2215 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2216 {
2217   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2218   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   n = 0;
2223   vertices = 0;
2224   if (pcbddc->ConstraintMatrix) {
2225     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2226     for (i=0;i<local_primal_size;i++) {
2227       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2228       if (size_of_constraint == 1) n++;
2229       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2230     }
2231     if (vertices_idx) {
2232       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2233       n = 0;
2234       for (i=0;i<local_primal_size;i++) {
2235         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2236         if (size_of_constraint == 1) {
2237           vertices[n++]=row_cmat_indices[0];
2238         }
2239         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2240       }
2241     }
2242   }
2243   *n_vertices = n;
2244   if (vertices_idx) *vertices_idx = vertices;
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2250 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2251 {
2252   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2253   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2254   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2255   PetscBT        touched;
2256   PetscErrorCode ierr;
2257 
2258     /* This function assumes that the number of local constraints per connected component
2259        is not greater than the number of nodes defined for the connected component
2260        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2261   PetscFunctionBegin;
2262   n = 0;
2263   constraints_index = 0;
2264   if (pcbddc->ConstraintMatrix) {
2265     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2266     max_size_of_constraint = 0;
2267     for (i=0;i<local_primal_size;i++) {
2268       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2269       if (size_of_constraint > 1) {
2270         n++;
2271       }
2272       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2273       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2274     }
2275     if (constraints_idx) {
2276       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2277       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2278       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2279       n = 0;
2280       for (i=0;i<local_primal_size;i++) {
2281         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2282         if (size_of_constraint > 1) {
2283           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2284           /* find first untouched local node */
2285           j = 0;
2286           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2287           min_index = row_cmat_global_indices[j];
2288           min_loc = j;
2289           /* search the minimum among nodes not yet touched on the connected component
2290              since there can be more than one constraint on a single cc */
2291           for (j=1;j<size_of_constraint;j++) {
2292             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2293               min_index = row_cmat_global_indices[j];
2294               min_loc = j;
2295             }
2296           }
2297           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2298           constraints_index[n++] = row_cmat_indices[min_loc];
2299         }
2300         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2301       }
2302       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2303       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2304     }
2305   }
2306   *n_constraints = n;
2307   if (constraints_idx) *constraints_idx = constraints_index;
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 /* the next two functions has been adapted from pcis.c */
2312 #undef __FUNCT__
2313 #define __FUNCT__ "PCBDDCApplySchur"
2314 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2315 {
2316   PetscErrorCode ierr;
2317   PC_IS          *pcis = (PC_IS*)(pc->data);
2318 
2319   PetscFunctionBegin;
2320   if (!vec2_B) { vec2_B = v; }
2321   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2322   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2323   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2324   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2325   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2331 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2332 {
2333   PetscErrorCode ierr;
2334   PC_IS          *pcis = (PC_IS*)(pc->data);
2335 
2336   PetscFunctionBegin;
2337   if (!vec2_B) { vec2_B = v; }
2338   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2339   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2340   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2341   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2342   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "PCBDDCSubsetNumbering"
2348 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2349 {
2350   Vec            local_vec,global_vec;
2351   IS             seqis,paris;
2352   VecScatter     scatter_ctx;
2353   PetscScalar    *array;
2354   PetscInt       *temp_global_dofs;
2355   PetscScalar    globalsum;
2356   PetscInt       i,j,s;
2357   PetscInt       nlocals,first_index,old_index,max_local;
2358   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2359   PetscMPIInt    *dof_sizes,*dof_displs;
2360   PetscBool      first_found;
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   /* mpi buffers */
2365   MPI_Comm_size(comm,&size_prec_comm);
2366   MPI_Comm_rank(comm,&rank_prec_comm);
2367   j = ( !rank_prec_comm ? size_prec_comm : 0);
2368   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2369   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2370   /* get maximum size of subset */
2371   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2372   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2373   max_local = 0;
2374   if (n_local_dofs) {
2375     max_local = temp_global_dofs[0];
2376     for (i=1;i<n_local_dofs;i++) {
2377       if (max_local < temp_global_dofs[i] ) {
2378         max_local = temp_global_dofs[i];
2379       }
2380     }
2381   }
2382   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2383   max_global++;
2384   max_local = 0;
2385   if (n_local_dofs) {
2386     max_local = local_dofs[0];
2387     for (i=1;i<n_local_dofs;i++) {
2388       if (max_local < local_dofs[i] ) {
2389         max_local = local_dofs[i];
2390       }
2391     }
2392   }
2393   max_local++;
2394   /* allocate workspace */
2395   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2396   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2397   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2398   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2399   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2400   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2401   /* create scatter */
2402   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2403   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2404   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2405   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2406   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2407   /* init array */
2408   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2409   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2410   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2411   if (local_dofs_mult) {
2412     for (i=0;i<n_local_dofs;i++) {
2413       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2414     }
2415   } else {
2416     for (i=0;i<n_local_dofs;i++) {
2417       array[local_dofs[i]]=1.0;
2418     }
2419   }
2420   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2421   /* scatter into global vec and get total number of global dofs */
2422   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2423   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2424   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2425   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2426   /* Fill global_vec with cumulative function for global numbering */
2427   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2428   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2429   nlocals = 0;
2430   first_index = -1;
2431   first_found = PETSC_FALSE;
2432   for (i=0;i<s;i++) {
2433     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2434       first_found = PETSC_TRUE;
2435       first_index = i;
2436     }
2437     nlocals += (PetscInt)PetscRealPart(array[i]);
2438   }
2439   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2440   if (!rank_prec_comm) {
2441     dof_displs[0]=0;
2442     for (i=1;i<size_prec_comm;i++) {
2443       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2444     }
2445   }
2446   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2447   if (first_found) {
2448     array[first_index] += (PetscScalar)nlocals;
2449     old_index = first_index;
2450     for (i=first_index+1;i<s;i++) {
2451       if (PetscRealPart(array[i]) > 0.0) {
2452         array[i] += array[old_index];
2453         old_index = i;
2454       }
2455     }
2456   }
2457   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2458   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2459   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2460   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2461   /* get global ordering of local dofs */
2462   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2463   if (local_dofs_mult) {
2464     for (i=0;i<n_local_dofs;i++) {
2465       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2466     }
2467   } else {
2468     for (i=0;i<n_local_dofs;i++) {
2469       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2470     }
2471   }
2472   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2473   /* free workspace */
2474   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2475   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2476   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2477   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2478   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2479   /* return pointer to global ordering of local dofs */
2480   *global_numbering_subset = temp_global_dofs;
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2486 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2487 {
2488   PetscInt       i,j;
2489   PetscScalar    *alphas;
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   /* this implements stabilized Gram-Schmidt */
2494   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2495   for (i=0;i<n;i++) {
2496     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2497     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2498     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2499   }
2500   ierr = PetscFree(alphas);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /* TODO
2505    - now preallocation is done assuming SEQDENSE local matrices
2506 */
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatISGetMPIXAIJ"
2509 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2510 {
2511   Mat                    new_mat;
2512   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2513   /* info on mat */
2514   /* ISLocalToGlobalMapping rmapping,cmapping; */
2515   PetscInt               bs,rows,cols;
2516   PetscInt               lrows,lcols;
2517   PetscInt               local_rows,local_cols;
2518   PetscBool              isdense;
2519   /* values insertion */
2520   PetscScalar            *array;
2521   PetscInt               *local_indices,*global_indices;
2522   /* work */
2523   PetscInt               i,j,index_row;
2524   PetscErrorCode         ierr;
2525 
2526   PetscFunctionBegin;
2527   /* MISSING CHECKS
2528     - rectangular case not covered (it is not allowed by MATIS)
2529   */
2530   /* get info from mat */
2531   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2532   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2533   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2534   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2535 
2536   /* work */
2537   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2538   for (i=0;i<local_rows;i++) local_indices[i]=i;
2539   /* map indices of local mat to global values */
2540   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2541   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2542   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2543 
2544   if (reuse==MAT_INITIAL_MATRIX) {
2545     Vec         vec_dnz,vec_onz;
2546     PetscScalar *my_dnz,*my_onz;
2547     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2548     PetscInt    index_col,owner;
2549     PetscMPIInt nsubdomains;
2550 
2551     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2552     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2553     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2554     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2555     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2556     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2557     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2558 
2559     /*
2560       preallocation
2561     */
2562 
2563     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2564     /*
2565        Some vectors are needed to sum up properly on shared interface dofs.
2566        Preallocation macros cannot do the job.
2567        Note that preallocation is not exact, since it overestimates nonzeros
2568     */
2569     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2570     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2571     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2572     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2573     /* All processes need to compute entire row ownership */
2574     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2575     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2576     for (i=0;i<nsubdomains;i++) {
2577       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2578         row_ownership[j]=i;
2579       }
2580     }
2581 
2582     /*
2583        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2584        then, they will be summed up properly. This way, preallocation is always sufficient
2585     */
2586     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2587     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2588     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2589     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2590     for (i=0;i<local_rows;i++) {
2591       index_row = global_indices[i];
2592       for (j=i;j<local_rows;j++) {
2593         owner = row_ownership[index_row];
2594         index_col = global_indices[j];
2595         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2596           my_dnz[i] += 1.0;
2597         } else { /* offdiag block */
2598           my_onz[i] += 1.0;
2599         }
2600         /* same as before, interchanging rows and cols */
2601         if (i != j) {
2602           owner = row_ownership[index_col];
2603           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2604             my_dnz[j] += 1.0;
2605           } else {
2606             my_onz[j] += 1.0;
2607           }
2608         }
2609       }
2610     }
2611     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2612     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2613     if (local_rows) { /* multilevel guard */
2614       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2615       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2616     }
2617     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2618     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2619     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2620     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2621     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2622     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2623     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2624 
2625     /* set computed preallocation in dnz and onz */
2626     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2627     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2628     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2629     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2630     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2631     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2632     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2633     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2634 
2635     /* Resize preallocation if overestimated */
2636     for (i=0;i<lrows;i++) {
2637       dnz[i] = PetscMin(dnz[i],lcols);
2638       onz[i] = PetscMin(onz[i],cols-lcols);
2639     }
2640     /* set preallocation */
2641     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2642     for (i=0;i<lrows/bs;i++) {
2643       dnz[i] = dnz[i*bs]/bs;
2644       onz[i] = onz[i*bs]/bs;
2645     }
2646     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2647     for (i=0;i<lrows/bs;i++) {
2648       dnz[i] = dnz[i]-i;
2649     }
2650     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2651     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2652     *M = new_mat;
2653   } else {
2654     PetscInt mbs,mrows,mcols;
2655     /* some checks */
2656     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2657     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2658     if (mrows != rows) {
2659       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2660     }
2661     if (mrows != rows) {
2662       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2663     }
2664     if (mbs != bs) {
2665       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2666     }
2667     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2668   }
2669   /* set local to global mappings */
2670   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2671   /* Set values */
2672   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2673   if (isdense) { /* special case for dense local matrices */
2674     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2675     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2676     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2677     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2678     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2679     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2680   } else { /* very basic values insertion for all other matrix types */
2681     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2682     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2683     for (i=0;i<local_rows;i++) {
2684       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2685       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2686       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2687       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2688       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2689       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2690     }
2691   }
2692   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2693   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2694   if (isdense) {
2695     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2696   }
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatISSubassemble_Private"
2702 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2703 {
2704   Mat             subdomain_adj;
2705   IS              new_ranks,ranks_send_to;
2706   MatPartitioning partitioner;
2707   Mat_IS          *matis;
2708   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2709   PetscInt        prank;
2710   PetscMPIInt     size,rank,color;
2711   PetscInt        *xadj,*adjncy,*oldranks;
2712   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2713   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2714   PetscErrorCode  ierr;
2715   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2716   PetscSubcomm    subcomm;
2717 
2718   PetscFunctionBegin;
2719   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2720   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2721   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2722 
2723   /* Get info on mapping */
2724   matis = (Mat_IS*)(mat->data);
2725   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2726   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2727 
2728   /* build local CSR graph of subdomains' connectivity */
2729   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2730   xadj[0] = 0;
2731   xadj[1] = PetscMax(n_neighs-1,0);
2732   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2733   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2734 
2735   if (threshold) {
2736     PetscInt* count,min_threshold;
2737     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2738     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2739     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2740       for (j=0;j<n_shared[i];j++) {
2741         count[shared[i][j]] += 1;
2742       }
2743     }
2744     /* adapt threshold since we dont want to lose significant connections */
2745     min_threshold = n_neighs;
2746     for (i=1;i<n_neighs;i++) {
2747       for (j=0;j<n_shared[i];j++) {
2748         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2749       }
2750     }
2751     threshold = PetscMax(min_threshold+1,threshold);
2752     xadj[1] = 0;
2753     for (i=1;i<n_neighs;i++) {
2754       for (j=0;j<n_shared[i];j++) {
2755         if (count[shared[i][j]] < threshold) {
2756           adjncy[xadj[1]] = neighs[i];
2757           adjncy_wgt[xadj[1]] = n_shared[i];
2758           xadj[1]++;
2759           break;
2760         }
2761       }
2762     }
2763     ierr = PetscFree(count);CHKERRQ(ierr);
2764   } else {
2765     if (xadj[1]) {
2766       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2767       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2768     }
2769   }
2770   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2771   if (use_square) {
2772     for (i=0;i<xadj[1];i++) {
2773       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2774     }
2775   }
2776   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2777 
2778   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2779 
2780   /*
2781     Restrict work on active processes only.
2782   */
2783   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2784   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2785   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2786   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2787   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2788   if (color) {
2789     ierr = PetscFree(xadj);CHKERRQ(ierr);
2790     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2791     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2792   } else {
2793     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2794     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2795     prank = rank;
2796     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2797     for (i=0;i<size;i++) {
2798       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2799     }
2800     for (i=0;i<xadj[1];i++) {
2801       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2802     }
2803     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2804     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2805     n_subdomains = (PetscInt)size;
2806     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2807 
2808     /* Partition */
2809     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2810     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2811     if (use_vwgt) {
2812       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2813       v_wgt[0] = local_size;
2814       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2815     }
2816     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2817     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2818     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2819     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2820     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2821 
2822     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2823     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2824     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2825     /* clean up */
2826     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2827     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2828     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2829     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2830   }
2831   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2832 
2833   /* assemble parallel IS for sends */
2834   i = 1;
2835   if (color) i=0;
2836   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2837   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2838 
2839   /* get back IS */
2840   *is_sends = ranks_send_to;
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatISSubassemble"
2848 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2849 {
2850   Mat                    local_mat,new_mat;
2851   Mat_IS                 *matis;
2852   IS                     is_sends_internal;
2853   PetscInt               rows,cols;
2854   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2855   PetscBool              ismatis,isdense;
2856   ISLocalToGlobalMapping l2gmap;
2857   PetscInt*              l2gmap_indices;
2858   const PetscInt*        is_indices;
2859   MatType                new_local_type;
2860   MatTypePrivate         new_local_type_private;
2861   /* buffers */
2862   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2863   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2864   /* MPI */
2865   MPI_Comm               comm;
2866   PetscMPIInt            n_sends,n_recvs,commsize;
2867   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2868   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2869   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2870   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2871   PetscErrorCode         ierr;
2872 
2873   PetscFunctionBegin;
2874   /* checks */
2875   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2876   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2877   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2878   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2879   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2880   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2881   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2882   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2883   PetscValidLogicalCollectiveInt(mat,bs,0);
2884   /* prepare IS for sending if not provided */
2885   if (!is_sends) {
2886     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2887     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2888   } else {
2889     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2890     is_sends_internal = is_sends;
2891   }
2892 
2893   /* get pointer of MATIS data */
2894   matis = (Mat_IS*)mat->data;
2895 
2896   /* get comm */
2897   comm = PetscObjectComm((PetscObject)mat);
2898 
2899   /* compute number of sends */
2900   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2901   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2902 
2903   /* compute number of receives */
2904   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2905   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2906   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2907   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2908   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2909   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2910   ierr = PetscFree(iflags);CHKERRQ(ierr);
2911 
2912   /* prepare send/receive buffers */
2913   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2914   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2915   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2916   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2917 
2918   /* Get data from local mat */
2919   if (!isdense) {
2920     /* TODO: See below some guidelines on how to prepare the local buffers */
2921     /*
2922        send_buffer_vals should contain the raw values of the local matrix
2923        send_buffer_idxs should contain:
2924        - MatType_PRIVATE type
2925        - PetscInt        size_of_l2gmap
2926        - PetscInt        global_row_indices[size_of_l2gmap]
2927        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2928     */
2929   } else {
2930     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2931     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2932     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2933     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2934     send_buffer_idxs[1] = i;
2935     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2936     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2937     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2938     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2939     for (i=0;i<n_sends;i++) {
2940       ilengths_vals[is_indices[i]] = len*len;
2941       ilengths_idxs[is_indices[i]] = len+2;
2942     }
2943   }
2944   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2945   buf_size_idxs = 0;
2946   buf_size_vals = 0;
2947   for (i=0;i<n_recvs;i++) {
2948     buf_size_idxs += (PetscInt)olengths_idxs[i];
2949     buf_size_vals += (PetscInt)olengths_vals[i];
2950   }
2951   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2952   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2953 
2954   /* get new tags for clean communications */
2955   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2956   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2957 
2958   /* allocate for requests */
2959   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2960   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2961   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2962   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2963 
2964   /* communications */
2965   ptr_idxs = recv_buffer_idxs;
2966   ptr_vals = recv_buffer_vals;
2967   for (i=0;i<n_recvs;i++) {
2968     source_dest = onodes[i];
2969     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2970     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2971     ptr_idxs += olengths_idxs[i];
2972     ptr_vals += olengths_vals[i];
2973   }
2974   for (i=0;i<n_sends;i++) {
2975     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2976     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2977     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2978   }
2979   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2980   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2981 
2982   /* assemble new l2g map */
2983   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2984   ptr_idxs = recv_buffer_idxs;
2985   buf_size_idxs = 0;
2986   for (i=0;i<n_recvs;i++) {
2987     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2988     ptr_idxs += olengths_idxs[i];
2989   }
2990   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2991   ptr_idxs = recv_buffer_idxs;
2992   buf_size_idxs = 0;
2993   for (i=0;i<n_recvs;i++) {
2994     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2995     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2996     ptr_idxs += olengths_idxs[i];
2997   }
2998   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2999   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3000   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3001 
3002   /* infer new local matrix type from received local matrices type */
3003   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3004   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3005   new_local_type_private = MATAIJ_PRIVATE;
3006   new_local_type = MATSEQAIJ;
3007   if (n_recvs) {
3008     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3009     ptr_idxs = recv_buffer_idxs;
3010     for (i=0;i<n_recvs;i++) {
3011       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3012         new_local_type_private = MATAIJ_PRIVATE;
3013         break;
3014       }
3015       ptr_idxs += olengths_idxs[i];
3016     }
3017     switch (new_local_type_private) {
3018       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3019         new_local_type = MATSEQAIJ;
3020         bs = 1;
3021         break;
3022       case MATAIJ_PRIVATE:
3023         new_local_type = MATSEQAIJ;
3024         bs = 1;
3025         break;
3026       case MATBAIJ_PRIVATE:
3027         new_local_type = MATSEQBAIJ;
3028         break;
3029       case MATSBAIJ_PRIVATE:
3030         new_local_type = MATSEQSBAIJ;
3031         break;
3032       default:
3033         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3034         break;
3035     }
3036   }
3037 
3038   /* create MATIS object */
3039   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3040   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3041   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3042   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3043   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3044   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3045 
3046   /* set values */
3047   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3048   ptr_vals = recv_buffer_vals;
3049   ptr_idxs = recv_buffer_idxs;
3050   for (i=0;i<n_recvs;i++) {
3051     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3052       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3053       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3054       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3055       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3056       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3057     }
3058     ptr_idxs += olengths_idxs[i];
3059     ptr_vals += olengths_vals[i];
3060   }
3061   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3065 
3066   { /* check */
3067     Vec       lvec,rvec;
3068     PetscReal infty_error;
3069 
3070     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3071     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3072     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3073     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3074     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3075     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3076     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3077     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3078     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3079   }
3080 
3081   /* free workspace */
3082   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3083   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3084   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3085   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3086   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3087   if (isdense) {
3088     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3089     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3090   } else {
3091     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3092   }
3093   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3094   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3095   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3096   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3097   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3098   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3099   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3100   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3101   ierr = PetscFree(onodes);CHKERRQ(ierr);
3102   /* get back new mat */
3103   *mat_n = new_mat;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 #undef __FUNCT__
3108 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3109 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3110 {
3111   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3112   PC_IS                  *pcis = (PC_IS*)pc->data;
3113   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3114   MatNullSpace           CoarseNullSpace=NULL;
3115   ISLocalToGlobalMapping coarse_islg;
3116   IS                     coarse_is;
3117   PetscInt               max_it;
3118   PetscInt               im_active=-1,active_procs=-1;
3119   PC                     pc_temp;
3120   PCType                 coarse_pc_type;
3121   KSPType                coarse_ksp_type;
3122   PetscBool              multilevel_requested,multilevel_allowed;
3123   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3124   MatStructure           matstruct;
3125   PetscErrorCode         ierr;
3126 
3127   PetscFunctionBegin;
3128   /* Assign global numbering to coarse dofs */
3129   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3130     PetscInt ocoarse_size;
3131     ocoarse_size = pcbddc->coarse_size;
3132     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3133     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3134     /* see if we can avoid some work */
3135     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3136       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3137         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3138         coarse_reuse = PETSC_FALSE;
3139       } else { /* we can safely reuse already computed coarse matrix */
3140         coarse_reuse = PETSC_TRUE;
3141       }
3142     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3143       coarse_reuse = PETSC_FALSE;
3144     }
3145   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3146     coarse_reuse = PETSC_TRUE;
3147   }
3148 
3149   /* infer some info from user */
3150   issym = PETSC_FALSE;
3151   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3152   multilevel_allowed = PETSC_FALSE;
3153   multilevel_requested = PETSC_FALSE;
3154   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3155   if (multilevel_requested) {
3156     /* count "active processes" */
3157     im_active = !!(pcis->n);
3158     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3159     if (active_procs/pcbddc->coarsening_ratio < 2) {
3160       multilevel_allowed = PETSC_FALSE;
3161     } else {
3162       multilevel_allowed = PETSC_TRUE;
3163     }
3164   }
3165 
3166   /* set defaults for coarse KSP and PC */
3167   if (multilevel_allowed) {
3168     if (issym) {
3169       coarse_ksp_type = KSPRICHARDSON;
3170     } else {
3171       coarse_ksp_type = KSPCHEBYSHEV;
3172     }
3173     coarse_pc_type = PCBDDC;
3174   } else {
3175     coarse_ksp_type = KSPPREONLY;
3176     coarse_pc_type = PCREDUNDANT;
3177   }
3178 
3179   /* create the coarse KSP object only once with defaults */
3180   if (!pcbddc->coarse_ksp) {
3181     char prefix[256],str_level[3];
3182     size_t len;
3183     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3184     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3185     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3186     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3187     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3188     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3189     /* prefix */
3190     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3191     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3192     if (!pcbddc->current_level) {
3193       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3194       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3195     } else {
3196       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3197       if (pcbddc->current_level>1) len -= 2;
3198       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3199       *(prefix+len)='\0';
3200       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3201       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3202     }
3203     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3204   }
3205   /* allow user customization */
3206   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3207   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3208   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3209 
3210   /* get some info after set from options */
3211   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3212   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3213   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3214   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3215     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3216     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3217     isbddc = PETSC_FALSE;
3218   }
3219 
3220   /* propagate BDDC info to the next level */
3221   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3222   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3223   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3224 
3225   /* creates temporary MATIS object for coarse matrix */
3226   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3227   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3228   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3229   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3230   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3231   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3232   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3233   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3234   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3235 
3236   /* assemble coarse matrix */
3237   if (isbddc || isnn) {
3238     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3239   } else {
3240     if (coarse_reuse) {
3241       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3242       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3243       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3244     } else {
3245       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3246     }
3247   }
3248   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3249 
3250   /* create local to global scatters for coarse problem */
3251   if (pcbddc->new_primal_space) {
3252     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3253     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3254     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3255     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3256     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3257   }
3258   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3259 
3260   /* propagate symmetry info to coarse matrix */
3261   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3262 
3263   /* Compute coarse null space (special handling by BDDC only) */
3264   if (pcbddc->NullSpace) {
3265     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3266     if (isbddc) {
3267       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3268     } else {
3269       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3270     }
3271   }
3272 
3273   /* set operators */
3274   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3275   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3276 
3277   /* additional KSP customization */
3278   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3279   if (max_it < 5) {
3280     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3281   }
3282   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3283 
3284 
3285   /* print some info if requested */
3286   if (pcbddc->dbg_flag) {
3287     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3288     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3289     if (!multilevel_allowed) {
3290       if (multilevel_requested) {
3291         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3292       } else if (pcbddc->max_levels) {
3293         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3294       }
3295     }
3296     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3297     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3298   }
3299 
3300   /* setup coarse ksp */
3301   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3302   if (pcbddc->dbg_flag) {
3303     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3304     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3305     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3306     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3307   }
3308 
3309   /* Check coarse problem if requested */
3310   if (pcbddc->dbg_flag) {
3311     KSP       check_ksp;
3312     KSPType   check_ksp_type;
3313     PC        check_pc;
3314     Vec       check_vec;
3315     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3316     PetscInt  its;
3317     PetscBool ispreonly,compute;
3318 
3319     /* Create ksp object suitable for estimation of extreme eigenvalues */
3320     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3321     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3322     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3323     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3324     if (ispreonly) {
3325       check_ksp_type = KSPPREONLY;
3326       compute = PETSC_FALSE;
3327     } else {
3328       if (issym) check_ksp_type = KSPCG;
3329       else check_ksp_type = KSPGMRES;
3330       compute = PETSC_TRUE;
3331     }
3332     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3333     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3334     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3335     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3336     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3337     /* create random vec */
3338     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3339     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3340     if (CoarseNullSpace) {
3341       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3342     }
3343     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3344     /* solve coarse problem */
3345     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3346     if (CoarseNullSpace) {
3347       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3348     }
3349     /* check coarse problem residual error */
3350     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3351     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3352     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3353     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3354     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3355     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3356     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3357     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3358     /* get eigenvalue estimation if preonly has not been requested */
3359     if (!ispreonly) {
3360       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3361       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3362       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3363     }
3364     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3365     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3366   }
3367   /* free memory */
3368   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3369   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 
3373 #undef __FUNCT__
3374 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3375 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3376 {
3377   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3378   PC_IS*         pcis = (PC_IS*)pc->data;
3379   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3380   PetscInt       i,coarse_size;
3381   PetscInt       *local_primal_indices;
3382   PetscErrorCode ierr;
3383 
3384   PetscFunctionBegin;
3385   /* Compute global number of coarse dofs */
3386   if (!pcbddc->primal_indices_local_idxs) {
3387     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3388   }
3389   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3390 
3391   /* check numbering */
3392   if (pcbddc->dbg_flag) {
3393     PetscScalar coarsesum,*array;
3394 
3395     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3397     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3398     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3399     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3400     for (i=0;i<pcbddc->local_primal_size;i++) {
3401       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3402     }
3403     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3404     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3405     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3406     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3407     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3408     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3409     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3410     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3411     for (i=0;i<pcis->n;i++) {
3412       if (array[i] == 1.0) {
3413         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3414       }
3415     }
3416     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3417     for (i=0;i<pcis->n;i++) {
3418       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3419     }
3420     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3421     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3422     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3423     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3424     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3425     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3426     if (pcbddc->dbg_flag > 1) {
3427       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3428       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3429       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3430       for (i=0;i<pcbddc->local_primal_size;i++) {
3431         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3432       }
3433       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3434     }
3435     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3436   }
3437   /* get back data */
3438   *coarse_size_n = coarse_size;
3439   *local_primal_indices_n = local_primal_indices;
3440   PetscFunctionReturn(0);
3441 }
3442 
3443