xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 68457ee5e567a733bb3ba02714aa90a4591afcc7)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_constraints,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131   n_R = pcis->n-pcbddc->n_actual_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (pcbddc->local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189   const PetscScalar      *array,*row_cmat_values;
190   const PetscInt         *row_cmat_indices,*idx_R_local;
191   PetscInt               *idx_V_B,*auxindices;
192   PetscInt               n_vertices,n_constraints,size_of_constraint;
193   PetscInt               i,j,n_R,n_D,n_B;
194   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
195   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196   MatType                impMatType;
197   /* some shortcuts to scalars */
198   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199   /* for debugging purposes */
200   PetscReal              *coarsefunctions_errors,*constraints_errors;
201 
202   PetscFunctionBegin;
203   /* get number of vertices (corners plus constraints with change of basis)
204      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205   n_vertices = pcbddc->n_actual_vertices;
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
220   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
221   if (i != n_vertices) {
222     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223   }
224 
225   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226   if (n_constraints) {
227     /* see if we can save some allocations */
228     if (pcbddc->local_auxmat2) {
229       PetscInt on_R,on_constraints;
230       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
231       if (on_R != n_R || on_constraints != n_constraints) {
232         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
233         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
234       }
235     }
236     /* work vectors */
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
239     /* auxiliary matrices */
240     if (!pcbddc->local_auxmat2) {
241       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
242       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
243       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
244       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
245     }
246 
247     /* Extract constraints on R nodes: C_{CR}  */
248     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
249     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
250     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
251 
252     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253     for (i=0;i<n_constraints;i++) {
254       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
255       /* Get row of constraint matrix in R numbering */
256       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
258       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
260       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
261       /* Solve for row of constraint matrix in R numbering */
262       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
263       /* Set values in local_auxmat2 */
264       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
266       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267     }
268     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
271 
272     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
274     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
275     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
276     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
277     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(M1);CHKERRQ(ierr);
279     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
280     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
281     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
282     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
283     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
284     ierr = MatDestroy(&M2);CHKERRQ(ierr);
285     ierr = MatDestroy(&M3);CHKERRQ(ierr);
286     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287     if (!pcbddc->local_auxmat1) {
288       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     } else {
290       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     }
292   }
293 
294   /* Get submatrices from subdomain matrix */
295   if (n_vertices) {
296     PetscInt ibs,mbs;
297     PetscBool issbaij;
298     Mat newmat;
299 
300     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
301     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
302     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
303     if (ibs != mbs) { /* need to convert to SEQAIJ */
304       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
308       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
309     } else {
310       /* this is safe */
311       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
313       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
315         /* which of the two approaches is faster? */
316         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
318         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
320         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
321       } else {
322         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       }
325     }
326     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
327     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
328     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
329   }
330 
331   /* Matrix of coarse basis functions (local) */
332   if (pcbddc->coarse_phi_B) {
333     PetscInt on_B,on_primal;
334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
335     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
337       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
338     }
339   }
340   if (pcbddc->coarse_phi_D) {
341     PetscInt on_D,on_primal;
342     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
343     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
345       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
346     }
347   }
348   if (!pcbddc->coarse_phi_B) {
349     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
350     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
351     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
352     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
353   }
354   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
356     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
357     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
358     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
359   }
360 
361   if (pcbddc->dbg_flag) {
362     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
364     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
365   }
366   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
370 
371   /* vertices */
372   for (i=0;i<n_vertices;i++) {
373     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
375     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
376     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
377     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
378     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
379     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
383     if (n_constraints) {
384       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
385       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
387     }
388     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
389     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
390 
391     /* Set values in coarse basis function and subdomain part of coarse_mat */
392     /* coarse basis functions */
393     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
394     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
400     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
405       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406     }
407     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     if (n_constraints) {
412       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
413       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
414       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     }
416 
417     /* check */
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on local nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
426       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
427       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
429       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       if (n_constraints) {
433         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       }
437       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
440       /* check saddle point solution */
441       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
444       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
445       /* shift by the identity matrix */
446       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
450     }
451   }
452 
453   /* constraints */
454   for (i=0;i<n_constraints;i++) {
455     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
456     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
457     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
458     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
459     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
461     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
462     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
463     if (n_vertices) {
464       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
465     }
466     /* Set values in coarse basis function and subdomain part of coarse_mat */
467     /* coarse basis functions */
468     j = i+n_vertices; /* don't touch this! */
469     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
470     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
474     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481     }
482     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483     if (n_vertices) {
484       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
485       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
487     }
488     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
489     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
491 
492     if (pcbddc->dbg_flag) {
493       /* assemble subdomain vector on nodes */
494       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
495       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
499       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
500       /* assemble subdomain vector of lagrange multipliers */
501       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
502       if (n_vertices) {
503         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
505         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506       }
507       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
513       /* check saddle point solution */
514       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
517       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
518       /* shift by the identity matrix */
519       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
520       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
523     }
524   }
525   /* call assembling routines for local coarse basis */
526   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   }
532 
533   /* compute other basis functions for non-symmetric problems */
534   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
535   if (!setsym || (setsym && !issym)) {
536     if (!pcbddc->coarse_psi_B) {
537       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
538       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
539       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
540       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
541     }
542     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
544       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
545       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
546       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
547     }
548     for (i=0;i<pcbddc->local_primal_size;i++) {
549       if (n_constraints) {
550         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
551         for (j=0;j<n_constraints;j++) {
552           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
553         }
554         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
555         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
556       }
557       if (i<n_vertices) {
558         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
559         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
560         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
561         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
562         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
563         if (n_constraints) {
564           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
565         }
566       } else {
567         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
568       }
569       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
570       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
571       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
575       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       if (i<n_vertices) {
577         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
578       }
579       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
584         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585       }
586 
587       if (pcbddc->dbg_flag) {
588         /* assemble subdomain vector on nodes */
589         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
590         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
592         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         if (i<n_vertices) {
594           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
595         }
596         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
597         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
598         /* assemble subdomain vector of lagrange multipliers */
599         for (j=0;j<pcbddc->local_primal_size;j++) {
600           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
601         }
602         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
603         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
604         /* check saddle point solution */
605         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
608         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
609         /* shift by the identity matrix */
610         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
611         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
614       }
615     }
616     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     }
622     unsymmetric_check = PETSC_TRUE;
623   } else { /* take references to already computed coarse basis */
624     unsymmetric_check = PETSC_FALSE;
625     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
626     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
627     if (pcbddc->coarse_phi_D) {
628       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
629       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
630     }
631   }
632   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
633   /* Checking coarse_sub_mat and coarse basis functios */
634   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
635   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   if (pcbddc->dbg_flag) {
637     Mat         coarse_sub_mat;
638     Mat         AUXMAT,TM1,TM2,TM3,TM4;
639     Mat         coarse_phi_D,coarse_phi_B;
640     Mat         coarse_psi_D,coarse_psi_B;
641     Mat         A_II,A_BB,A_IB,A_BI;
642     MatType     checkmattype=MATSEQAIJ;
643     PetscReal   real_value;
644 
645     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
646     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
649     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
651     if (unsymmetric_check) {
652       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
653       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
654     }
655     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
656 
657     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
659     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
660     if (unsymmetric_check) {
661       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
665       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
667       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
668       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
669       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
670       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673     } else {
674       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
675       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
676       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
677       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
678       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
679       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
680       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
681       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
682     }
683     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
684     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
687     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
688     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
689     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
694     for (i=0;i<pcbddc->local_primal_size;i++) {
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
696     }
697     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
698     for (i=0;i<pcbddc->local_primal_size;i++) {
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
700     }
701     if (unsymmetric_check) {
702       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
703       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
704         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
705       }
706       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
707       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
708         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
709       }
710     }
711     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
712     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
716     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
720     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
722     if (unsymmetric_check) {
723       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
724       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
725     }
726     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
727     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
728     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
729     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
730   }
731   /* free memory */
732   if (n_vertices) {
733     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
735     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
738   }
739   if (n_constraints) {
740     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
741     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
742     ierr = MatDestroy(&M1);CHKERRQ(ierr);
743     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
744   }
745   ierr = PetscFree(auxindices);CHKERRQ(ierr);
746   /* get back data */
747   *coarse_submat_vals_n = coarse_submat_vals;
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
754 {
755   PC_IS*            pcis = (PC_IS*)(pc->data);
756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
758   PetscBool         issbaij,isseqaij;
759   /* manage repeated solves */
760   MatReuse          reuse;
761   MatStructure      matstruct;
762   PetscErrorCode    ierr;
763 
764   PetscFunctionBegin;
765   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
766     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767   }
768   /* get mat flags */
769   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
770   reuse = MAT_INITIAL_MATRIX;
771   if (pc->setupcalled) {
772     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
773     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
774     if (matstruct == SAME_NONZERO_PATTERN) {
775       reuse = MAT_REUSE_MATRIX;
776     } else {
777       reuse = MAT_INITIAL_MATRIX;
778     }
779   }
780   if (reuse == MAT_INITIAL_MATRIX) {
781     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
783     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
784     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
785     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
786   }
787 
788   /* transform local matrices if needed */
789   if (pcbddc->use_change_of_basis) {
790     Mat         change_mat_all;
791     PetscScalar *row_cmat_values;
792     PetscInt    *row_cmat_indices;
793     PetscInt    *nnz,*is_indices,*temp_indices;
794     PetscInt    i,j,k,n_D,n_B;
795 
796     /* Get Non-overlapping dimensions */
797     n_B = pcis->n_B;
798     n_D = pcis->n-n_B;
799 
800     /* compute nonzero structure of change of basis on all local nodes */
801     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
802     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
804     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
805     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
806     k=1;
807     for (i=0;i<n_B;i++) {
808       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
809       nnz[is_indices[i]]=j;
810       if (k < j) k = j;
811       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
812     }
813     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     /* assemble change of basis matrix on the whole set of local dofs */
815     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
816     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
817     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
818     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
819     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
820     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
821     for (i=0;i<n_D;i++) {
822       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
823     }
824     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
826     for (i=0;i<n_B;i++) {
827       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
828       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
829       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
830       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
831     }
832     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
835     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
836     if (isseqaij) {
837       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
838     } else {
839       Mat work_mat;
840       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
841       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
842       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
843     }
844     /*
845     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
846     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
847     */
848     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
849     ierr = PetscFree(nnz);CHKERRQ(ierr);
850     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
851   } else {
852     /* without change of basis, the local matrix is unchanged */
853     if (!pcbddc->local_mat) {
854       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
855       pcbddc->local_mat = matis->A;
856     }
857   }
858 
859   /* get submatrices */
860   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
861   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
862   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
863   if (!issbaij) {
864     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
865     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
866   } else {
867     Mat newmat;
868     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
869     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
870     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
871     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
879 {
880   PC_IS*         pcis = (PC_IS*)(pc->data);
881   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
882   IS             is_aux1,is_aux2;
883   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
884   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
885   PetscInt       vbs,bs;
886   PetscBT        bitmask;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   /*
891     No need to setup local scatters if
892       - primal space is unchanged
893         AND
894       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
895         AND
896       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
897   */
898   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
899     PetscFunctionReturn(0);
900   }
901   /* destroy old objects */
902   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
903   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
904   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
905   /* Set Non-overlapping dimensions */
906   n_B = pcis->n_B; n_D = pcis->n - n_B;
907   n_vertices = pcbddc->n_actual_vertices;
908   /* create auxiliary bitmask */
909   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
910   for (i=0;i<n_vertices;i++) {
911     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
912   }
913 
914   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
915   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
916   for (i=0, n_R=0; i<pcis->n; i++) {
917     if (!PetscBTLookup(bitmask,i)) {
918       idx_R_local[n_R] = i;
919       n_R++;
920     }
921   }
922 
923   /* Block code */
924   vbs = 1;
925   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
926   if (bs>1 && !(n_vertices%bs)) {
927     PetscBool is_blocked = PETSC_TRUE;
928     PetscInt  *vary;
929     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
930     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
931     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
932     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
933     for (i=0; i<n_vertices; i++) {
934       if (vary[i]!=0 && vary[i]!=bs) {
935         is_blocked = PETSC_FALSE;
936         break;
937       }
938     }
939     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
940       vbs = bs;
941       for (i=0;i<n_R/vbs;i++) {
942         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
943       }
944     }
945     ierr = PetscFree(vary);CHKERRQ(ierr);
946   }
947   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
948   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
949 
950   /* print some info if requested */
951   if (pcbddc->dbg_flag) {
952     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
953     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
956     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
957     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
960   }
961 
962   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
963   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
964   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
965   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
966   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   for (i=0; i<n_D; i++) {
968     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
969   }
970   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
971   for (i=0, j=0; i<n_R; i++) {
972     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
973       aux_array1[j++] = i;
974     }
975   }
976   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
977   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
978   for (i=0, j=0; i<n_B; i++) {
979     if (!PetscBTLookup(bitmask,is_indices[i])) {
980       aux_array2[j++] = i;
981     }
982   }
983   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
984   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
985   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
986   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
987   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
988 
989   if (pcbddc->switch_static || pcbddc->dbg_flag) {
990     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
991     for (i=0, j=0; i<n_R; i++) {
992       if (PetscBTLookup(bitmask,idx_R_local[i])) {
993         aux_array1[j++] = i;
994       }
995     }
996     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
997     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
998     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
999   }
1000   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1001   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1009 {
1010   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1011   PC_IS          *pcis = (PC_IS*)pc->data;
1012   PC             pc_temp;
1013   Mat            A_RR;
1014   MatStructure   matstruct;
1015   MatReuse       reuse;
1016   PetscScalar    m_one = -1.0;
1017   PetscReal      value;
1018   PetscInt       n_D,n_R,ibs,mbs;
1019   PetscBool      use_exact,use_exact_reduced,issbaij;
1020   PetscErrorCode ierr;
1021   /* prefixes stuff */
1022   char           dir_prefix[256],neu_prefix[256],str_level[16];
1023   size_t         len;
1024 
1025   PetscFunctionBegin;
1026   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1027 
1028   /* compute prefixes */
1029   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1030   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1031   if (!pcbddc->current_level) {
1032     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1033     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1034     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1035     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1036   } else {
1037     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1038     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1039     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1040     len -= 15; /* remove "pc_bddc_coarse_" */
1041     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1042     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1043     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1044     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1045     *(dir_prefix+len)='\0';
1046     *(neu_prefix+len)='\0';
1047     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1048     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1049     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1050     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1051   }
1052 
1053   /* DIRICHLET PROBLEM */
1054   /* Matrix for Dirichlet problem is pcis->A_II */
1055   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1056   if (!pcbddc->ksp_D) { /* create object if not yet build */
1057     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1058     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1059     /* default */
1060     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1061     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1062     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1063     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1064     if (issbaij) {
1065       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1066     } else {
1067       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1068     }
1069     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1070     /* Allow user's customization */
1071     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1072   }
1073   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1074   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1075   if (!n_D) {
1076     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1077     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1078   }
1079   /* Set Up KSP for Dirichlet problem of BDDC */
1080   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1081   /* set ksp_D into pcis data */
1082   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1083   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1084   pcis->ksp_D = pcbddc->ksp_D;
1085 
1086   /* NEUMANN PROBLEM */
1087   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1088   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1089   if (pcbddc->ksp_R) { /* already created ksp */
1090     PetscInt nn_R;
1091     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1092     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1093     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1094     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1095       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1096       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1097       reuse = MAT_INITIAL_MATRIX;
1098     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1099       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1100         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1101         reuse = MAT_INITIAL_MATRIX;
1102       } else { /* safe to reuse the matrix */
1103         reuse = MAT_REUSE_MATRIX;
1104       }
1105     }
1106     /* last check */
1107     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1108       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1109       reuse = MAT_INITIAL_MATRIX;
1110     }
1111   } else { /* first time, so we need to create the matrix */
1112     reuse = MAT_INITIAL_MATRIX;
1113   }
1114   /* extract A_RR */
1115   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1116   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1117   if (ibs != mbs) {
1118     Mat newmat;
1119     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1120     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1121     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1122   } else {
1123     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1124   }
1125   if (!pcbddc->ksp_R) { /* create object if not present */
1126     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1127     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1128     /* default */
1129     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1130     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1131     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1132     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1133     if (issbaij) {
1134       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1135     } else {
1136       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1137     }
1138     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1139     /* Allow user's customization */
1140     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1141   }
1142   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1143   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1144   if (!n_R) {
1145     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1146     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1147   }
1148   /* Set Up KSP for Neumann problem of BDDC */
1149   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1150 
1151   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1152   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1153     /* Dirichlet */
1154     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1155     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1156     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1157     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1158     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1159     /* need to be adapted? */
1160     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1161     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1162     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1163     /* print info */
1164     if (pcbddc->dbg_flag) {
1165       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1166       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1168       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1169       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1170       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1171     }
1172     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1173       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1174     }
1175 
1176     /* Neumann */
1177     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1178     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1179     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1180     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1181     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1182     /* need to be adapted? */
1183     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1184     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1185     /* print info */
1186     if (pcbddc->dbg_flag) {
1187       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1188       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1189     }
1190     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1191       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1192     }
1193   }
1194   /* free Neumann problem's matrix */
1195   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1201 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1202 {
1203   PetscErrorCode ierr;
1204   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1205 
1206   PetscFunctionBegin;
1207   if (applytranspose) {
1208     if (pcbddc->local_auxmat1) {
1209       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1210       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1211     }
1212     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1213   } else {
1214     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1215     if (pcbddc->local_auxmat1) {
1216       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1217       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1218     }
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1224 #undef __FUNCT__
1225 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1226 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1227 {
1228   PetscErrorCode ierr;
1229   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1230   PC_IS*            pcis = (PC_IS*)  (pc->data);
1231   const PetscScalar zero = 0.0;
1232 
1233   PetscFunctionBegin;
1234   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1235   if (applytranspose) {
1236     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1237     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1238   } else {
1239     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1240     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1241   }
1242   /* start communications from local primal nodes to rhs of coarse solver */
1243   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1244   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1248   /* TODO remove null space when doing multilevel */
1249   if (pcbddc->coarse_ksp) {
1250     if (applytranspose) {
1251       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1252     } else {
1253       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1254     }
1255   }
1256   /* start communications from coarse solver solution to local primal nodes */
1257   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258 
1259   /* Local solution on R nodes */
1260   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1261   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1262   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1263   if (pcbddc->switch_static) {
1264     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1265     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1266   }
1267   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1268   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1269   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271   if (pcbddc->switch_static) {
1272     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   }
1275 
1276   /* complete communications from coarse sol to local primal nodes */
1277   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1278 
1279   /* Sum contributions from two levels */
1280   if (applytranspose) {
1281     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1282     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1283   } else {
1284     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1285     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1291 #undef __FUNCT__
1292 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1293 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1294 {
1295   PetscErrorCode ierr;
1296   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1297   PetscScalar    *array,*array2;
1298   Vec            from,to;
1299 
1300   PetscFunctionBegin;
1301   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1302     from = pcbddc->coarse_vec;
1303     to = pcbddc->vec1_P;
1304     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1305       Vec tvec;
1306       PetscInt lsize;
1307       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1308       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1309       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1310       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1311       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1312       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1313       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1314     }
1315   } else { /* from local to global -> put data in coarse right hand side */
1316     from = pcbddc->vec1_P;
1317     to = pcbddc->coarse_vec;
1318   }
1319   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1325 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1326 {
1327   PetscErrorCode ierr;
1328   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1329   PetscScalar    *array,*array2;
1330   Vec            from,to;
1331 
1332   PetscFunctionBegin;
1333   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1334     from = pcbddc->coarse_vec;
1335     to = pcbddc->vec1_P;
1336   } else { /* from local to global -> put data in coarse right hand side */
1337     from = pcbddc->vec1_P;
1338     to = pcbddc->coarse_vec;
1339   }
1340   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1341   if (smode == SCATTER_FORWARD) {
1342     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1343       Vec tvec;
1344       PetscInt lsize;
1345       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1346       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1347       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1348       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1349       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1350       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1351       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 /* uncomment for testing purposes */
1358 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1359 #undef __FUNCT__
1360 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1361 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1362 {
1363   PetscErrorCode    ierr;
1364   PC_IS*            pcis = (PC_IS*)(pc->data);
1365   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1366   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1367   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1368   MatType           impMatType=MATSEQAIJ;
1369   /* one and zero */
1370   PetscScalar       one=1.0,zero=0.0;
1371   /* space to store constraints and their local indices */
1372   PetscScalar       *temp_quadrature_constraint;
1373   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1374   /* iterators */
1375   PetscInt          i,j,k,total_counts,temp_start_ptr;
1376   /* stuff to store connected components stored in pcbddc->mat_graph */
1377   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1378   PetscInt          n_ISForFaces,n_ISForEdges;
1379   /* near null space stuff */
1380   MatNullSpace      nearnullsp;
1381   const Vec         *nearnullvecs;
1382   Vec               *localnearnullsp;
1383   PetscBool         nnsp_has_cnst;
1384   PetscInt          nnsp_size;
1385   PetscScalar       *array;
1386   /* BLAS integers */
1387   PetscBLASInt      lwork,lierr;
1388   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1389   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1390   /* LAPACK working arrays for SVD or POD */
1391   PetscBool         skip_lapack;
1392   PetscScalar       *work;
1393   PetscReal         *singular_vals;
1394 #if defined(PETSC_USE_COMPLEX)
1395   PetscReal         *rwork;
1396 #endif
1397 #if defined(PETSC_MISSING_LAPACK_GESVD)
1398   PetscBLASInt      Blas_one_2=1;
1399   PetscScalar       *temp_basis,*correlation_mat;
1400 #else
1401   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1402   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1403 #endif
1404   /* reuse */
1405   PetscInt          olocal_primal_size;
1406   PetscInt          *oprimal_indices_local_idxs;
1407   /* change of basis */
1408   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1409   PetscBool         boolforchange,qr_needed;
1410   PetscBT           touched,change_basis,qr_needed_idx;
1411   /* auxiliary stuff */
1412   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1413   /* some quantities */
1414   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1415   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1416 
1417 
1418   PetscFunctionBegin;
1419   /* Destroy Mat objects computed previously */
1420   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1421   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1422   /* Get index sets for faces, edges and vertices from graph */
1423   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1424     pcbddc->use_vertices = PETSC_TRUE;
1425   }
1426   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1427   /* HACKS (the two following code branches) */
1428   if (!ISForVertices && pcbddc->NullSpace) {
1429     pcbddc->use_change_of_basis = PETSC_TRUE;
1430     pcbddc->use_change_on_faces = PETSC_FALSE;
1431   }
1432   if (pcbddc->NullSpace) {
1433     /* use_change_of_basis should be consistent among processors */
1434     PetscBool tbool = pcbddc->use_change_of_basis;
1435     ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1436   }
1437   /* print some info */
1438   if (pcbddc->dbg_flag) {
1439     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1441     i = 0;
1442     if (ISForVertices) {
1443       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1444     }
1445     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1446     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1447     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1448     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1449   }
1450   /* check if near null space is attached to global mat */
1451   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1452   if (nearnullsp) {
1453     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1454     /* remove any stored info */
1455     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1456     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1457     /* store information for BDDC solver reuse */
1458     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1459     pcbddc->onearnullspace = nearnullsp;
1460     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1461     for (i=0;i<nnsp_size;i++) {
1462       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1463     }
1464   } else { /* if near null space is not provided BDDC uses constants by default */
1465     nnsp_size = 0;
1466     nnsp_has_cnst = PETSC_TRUE;
1467   }
1468   /* get max number of constraints on a single cc */
1469   max_constraints = nnsp_size;
1470   if (nnsp_has_cnst) max_constraints++;
1471 
1472   /*
1473        Evaluate maximum storage size needed by the procedure
1474        - temp_indices will contain start index of each constraint stored as follows
1475        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1476        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1477        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1478                                                                                                                                                          */
1479   total_counts = n_ISForFaces+n_ISForEdges;
1480   total_counts *= max_constraints;
1481   n_vertices = 0;
1482   if (ISForVertices) {
1483     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1484   }
1485   total_counts += n_vertices;
1486   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1487   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1488   total_counts = 0;
1489   max_size_of_constraint = 0;
1490   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1491     if (i<n_ISForEdges) {
1492       used_IS = &ISForEdges[i];
1493     } else {
1494       used_IS = &ISForFaces[i-n_ISForEdges];
1495     }
1496     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1497     total_counts += j;
1498     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1499   }
1500   total_counts *= max_constraints;
1501   total_counts += n_vertices;
1502   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1503   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1504   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1505   /* get local part of global near null space vectors */
1506   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1507   for (k=0;k<nnsp_size;k++) {
1508     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1509     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511   }
1512 
1513   /* whether or not to skip lapack calls */
1514   skip_lapack = PETSC_TRUE;
1515   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1516 
1517   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1518   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1519     PetscScalar temp_work;
1520 #if defined(PETSC_MISSING_LAPACK_GESVD)
1521     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1522     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1523     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1524     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1525 #if defined(PETSC_USE_COMPLEX)
1526     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1527 #endif
1528     /* now we evaluate the optimal workspace using query with lwork=-1 */
1529     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1530     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1531     lwork = -1;
1532     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1533 #if !defined(PETSC_USE_COMPLEX)
1534     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1535 #else
1536     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1537 #endif
1538     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1539     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1540 #else /* on missing GESVD */
1541     /* SVD */
1542     PetscInt max_n,min_n;
1543     max_n = max_size_of_constraint;
1544     min_n = max_constraints;
1545     if (max_size_of_constraint < max_constraints) {
1546       min_n = max_size_of_constraint;
1547       max_n = max_constraints;
1548     }
1549     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1550 #if defined(PETSC_USE_COMPLEX)
1551     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1552 #endif
1553     /* now we evaluate the optimal workspace using query with lwork=-1 */
1554     lwork = -1;
1555     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1556     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1557     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1558     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1559 #if !defined(PETSC_USE_COMPLEX)
1560     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1561 #else
1562     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1563 #endif
1564     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1565     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1566 #endif /* on missing GESVD */
1567     /* Allocate optimal workspace */
1568     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1569     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1570   }
1571   /* Now we can loop on constraining sets */
1572   total_counts = 0;
1573   temp_indices[0] = 0;
1574   /* vertices */
1575   if (ISForVertices) {
1576     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1577     if (nnsp_has_cnst) { /* consider all vertices */
1578       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1579       for (i=0;i<n_vertices;i++) {
1580         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1581         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1582         total_counts++;
1583       }
1584     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1585       PetscBool used_vertex;
1586       for (i=0;i<n_vertices;i++) {
1587         used_vertex = PETSC_FALSE;
1588         k = 0;
1589         while (!used_vertex && k<nnsp_size) {
1590           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1591           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1592             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1593             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1594             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1595             total_counts++;
1596             used_vertex = PETSC_TRUE;
1597           }
1598           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1599           k++;
1600         }
1601       }
1602     }
1603     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1604     n_vertices = total_counts;
1605   }
1606 
1607   /* edges and faces */
1608   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1609     if (i<n_ISForEdges) {
1610       used_IS = &ISForEdges[i];
1611       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1612     } else {
1613       used_IS = &ISForFaces[i-n_ISForEdges];
1614       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1615     }
1616     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1617     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1618     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1619     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1620     /* change of basis should not be performed on local periodic nodes */
1621     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1622     if (nnsp_has_cnst) {
1623       PetscScalar quad_value;
1624       temp_constraints++;
1625       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1626       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1627       for (j=0;j<size_of_constraint;j++) {
1628         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1629       }
1630       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1631       total_counts++;
1632     }
1633     for (k=0;k<nnsp_size;k++) {
1634       PetscReal real_value;
1635       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1636       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1637       for (j=0;j<size_of_constraint;j++) {
1638         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1639       }
1640       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1641       /* check if array is null on the connected component */
1642       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1643       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1644       if (real_value > 0.0) { /* keep indices and values */
1645         temp_constraints++;
1646         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1647         total_counts++;
1648       }
1649     }
1650     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1651     valid_constraints = temp_constraints;
1652     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1653     if (!pcbddc->use_nnsp_true && temp_constraints) {
1654       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1655 
1656 #if defined(PETSC_MISSING_LAPACK_GESVD)
1657       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1658          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1659          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1660             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1661             from that computed using LAPACKgesvd
1662          -> This is due to a different computation of eigenvectors in LAPACKheev
1663          -> The quality of the POD-computed basis will be the same */
1664       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1665       /* Store upper triangular part of correlation matrix */
1666       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1667       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1668       for (j=0;j<temp_constraints;j++) {
1669         for (k=0;k<j+1;k++) {
1670           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1671         }
1672       }
1673       /* compute eigenvalues and eigenvectors of correlation matrix */
1674       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1675       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1676 #if !defined(PETSC_USE_COMPLEX)
1677       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1678 #else
1679       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1680 #endif
1681       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1682       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1683       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1684       j=0;
1685       while (j < temp_constraints && singular_vals[j] < tol) j++;
1686       total_counts=total_counts-j;
1687       valid_constraints = temp_constraints-j;
1688       /* scale and copy POD basis into used quadrature memory */
1689       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1690       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1691       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1692       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1693       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1694       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1695       if (j<temp_constraints) {
1696         PetscInt ii;
1697         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1698         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1699         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1700         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1701         for (k=0;k<temp_constraints-j;k++) {
1702           for (ii=0;ii<size_of_constraint;ii++) {
1703             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1704           }
1705         }
1706       }
1707 #else  /* on missing GESVD */
1708       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1709       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1710       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1711       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1712 #if !defined(PETSC_USE_COMPLEX)
1713       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1714 #else
1715       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1716 #endif
1717       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1718       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1719       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1720       k = temp_constraints;
1721       if (k > size_of_constraint) k = size_of_constraint;
1722       j = 0;
1723       while (j < k && singular_vals[k-j-1] < tol) j++;
1724       total_counts = total_counts-temp_constraints+k-j;
1725       valid_constraints = k-j;
1726 #endif /* on missing GESVD */
1727     }
1728     /* setting change_of_basis flag is safe now */
1729     if (boolforchange) {
1730       for (j=0;j<valid_constraints;j++) {
1731         PetscBTSet(change_basis,total_counts-j-1);
1732       }
1733     }
1734   }
1735   /* free index sets of faces, edges and vertices */
1736   for (i=0;i<n_ISForFaces;i++) {
1737     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1738   }
1739   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1740   for (i=0;i<n_ISForEdges;i++) {
1741     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1742   }
1743   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1744   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1745   /* map temp_indices_to_constraint in boundary numbering */
1746   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1747   if (i != temp_indices[total_counts]) {
1748     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1749   }
1750 
1751   /* free workspace */
1752   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1753     ierr = PetscFree(work);CHKERRQ(ierr);
1754 #if defined(PETSC_USE_COMPLEX)
1755     ierr = PetscFree(rwork);CHKERRQ(ierr);
1756 #endif
1757     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1758 #if defined(PETSC_MISSING_LAPACK_GESVD)
1759     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1760     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1761 #endif
1762   }
1763   for (k=0;k<nnsp_size;k++) {
1764     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1765   }
1766   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1767 
1768   /* set quantities in pcbddc data structure and store previous primal size */
1769   /* n_vertices defines the number of subdomain corners in the primal space */
1770   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1771   olocal_primal_size = pcbddc->local_primal_size;
1772   pcbddc->local_primal_size = total_counts;
1773   pcbddc->n_vertices = n_vertices;
1774   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1775 
1776   /* Create constraint matrix */
1777   /* The constraint matrix is used to compute the l2g map of primal dofs */
1778   /* so we need to set it up properly either with or without change of basis */
1779   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1780   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1781   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1782   /* array to compute a local numbering of constraints : vertices first then constraints */
1783   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1784   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1785   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1786   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1787   /* auxiliary stuff for basis change */
1788   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1789   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1790 
1791   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1792   total_primal_vertices=0;
1793   for (i=0;i<pcbddc->local_primal_size;i++) {
1794     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1795     if (size_of_constraint == 1) {
1796       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1797       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1798       aux_primal_minloc[total_primal_vertices]=0;
1799       total_primal_vertices++;
1800     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1801       PetscInt min_loc,min_index;
1802       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1803       /* find first untouched local node */
1804       k = 0;
1805       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1806       min_index = global_indices[k];
1807       min_loc = k;
1808       /* search the minimum among global nodes already untouched on the cc */
1809       for (k=1;k<size_of_constraint;k++) {
1810         /* there can be more than one constraint on a single connected component */
1811         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1812           min_index = global_indices[k];
1813           min_loc = k;
1814         }
1815       }
1816       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1817       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1818       aux_primal_minloc[total_primal_vertices]=min_loc;
1819       total_primal_vertices++;
1820     }
1821   }
1822   /* determine if a QR strategy is needed for change of basis */
1823   qr_needed = PETSC_FALSE;
1824   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1825   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1826     if (PetscBTLookup(change_basis,i)) {
1827       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1828       j = 0;
1829       for (k=0;k<size_of_constraint;k++) {
1830         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1831           j++;
1832         }
1833       }
1834       /* found more than one primal dof on the cc */
1835       if (j > 1) {
1836         PetscBTSet(qr_needed_idx,i);
1837         qr_needed = PETSC_TRUE;
1838       }
1839     }
1840   }
1841   /* free workspace */
1842   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1843 
1844   /* permute indices in order to have a sorted set of vertices */
1845   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1846 
1847   /* nonzero structure of constraint matrix */
1848   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1849   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1850   j=total_primal_vertices;
1851   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1852     if (!PetscBTLookup(change_basis,i)) {
1853       nnz[j]=temp_indices[i+1]-temp_indices[i];
1854       j++;
1855     }
1856   }
1857   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1858   ierr = PetscFree(nnz);CHKERRQ(ierr);
1859   /* set values in constraint matrix */
1860   for (i=0;i<total_primal_vertices;i++) {
1861     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1862   }
1863   total_counts = total_primal_vertices;
1864   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1865     if (!PetscBTLookup(change_basis,i)) {
1866       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1867       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1868       total_counts++;
1869     }
1870   }
1871   /* assembling */
1872   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1873   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1874   /*
1875   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1876   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1877   */
1878   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1879   if (pcbddc->use_change_of_basis) {
1880     /* dual and primal dofs on a single cc */
1881     PetscInt     dual_dofs,primal_dofs;
1882     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1883     PetscInt     primal_counter;
1884     /* working stuff for GEQRF */
1885     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1886     PetscBLASInt lqr_work;
1887     /* working stuff for UNGQR */
1888     PetscScalar  *gqr_work,lgqr_work_t;
1889     PetscBLASInt lgqr_work;
1890     /* working stuff for TRTRS */
1891     PetscScalar  *trs_rhs;
1892     PetscBLASInt Blas_NRHS;
1893     /* pointers for values insertion into change of basis matrix */
1894     PetscInt     *start_rows,*start_cols;
1895     PetscScalar  *start_vals;
1896     /* working stuff for values insertion */
1897     PetscBT      is_primal;
1898 
1899     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1900     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1901     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1902     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1903     /* work arrays */
1904     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1905     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1906     /* nonzeros per row */
1907     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1908       if (PetscBTLookup(change_basis,i)) {
1909         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1910         if (PetscBTLookup(qr_needed_idx,i)) {
1911           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1912         } else {
1913           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1914           /* get local primal index on the cc */
1915           j = 0;
1916           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1917           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1918         }
1919       }
1920     }
1921     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1922     ierr = PetscFree(nnz);CHKERRQ(ierr);
1923     /* Set initial identity in the matrix */
1924     for (i=0;i<pcis->n_B;i++) {
1925       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1926     }
1927 
1928     if (pcbddc->dbg_flag) {
1929       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1930       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1931     }
1932 
1933 
1934     /* Now we loop on the constraints which need a change of basis */
1935     /*
1936        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1937        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1938 
1939        Basic blocks of change of basis matrix T computed
1940 
1941           - Using the following block transformation if there is only a primal dof on the cc
1942             (in the example, primal dof is the last one of the edge in LOCAL ordering
1943              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1944             | 1        0   ...        0     1 |
1945             | 0        1   ...        0     1 |
1946             |              ...                |
1947             | 0        ...            1     1 |
1948             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1949 
1950           - via QR decomposition of constraints otherwise
1951     */
1952     if (qr_needed) {
1953       /* space to store Q */
1954       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1955       /* first we issue queries for optimal work */
1956       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1957       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1958       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1959       lqr_work = -1;
1960       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1961       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1962       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1963       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1964       lgqr_work = -1;
1965       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1966       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1967       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1968       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1969       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1970       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1971       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1972       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1973       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1974       /* array to store scaling factors for reflectors */
1975       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1976       /* array to store rhs and solution of triangular solver */
1977       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1978       /* allocating workspace for check */
1979       if (pcbddc->dbg_flag) {
1980         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1981       }
1982     }
1983     /* array to store whether a node is primal or not */
1984     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1985     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1986     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1987     if (i != total_primal_vertices) {
1988       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1989     }
1990     for (i=0;i<total_primal_vertices;i++) {
1991       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1992     }
1993     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1994 
1995     /* loop on constraints and see whether or not they need a change of basis and compute it */
1996     /* -> using implicit ordering contained in temp_indices data */
1997     total_counts = pcbddc->n_vertices;
1998     primal_counter = total_counts;
1999     while (total_counts<pcbddc->local_primal_size) {
2000       primal_dofs = 1;
2001       if (PetscBTLookup(change_basis,total_counts)) {
2002         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2003         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2004           primal_dofs++;
2005         }
2006         /* get constraint info */
2007         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2008         dual_dofs = size_of_constraint-primal_dofs;
2009 
2010         if (pcbddc->dbg_flag) {
2011           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2012         }
2013 
2014         if (primal_dofs > 1) { /* QR */
2015 
2016           /* copy quadrature constraints for change of basis check */
2017           if (pcbddc->dbg_flag) {
2018             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2019           }
2020           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2021           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2022 
2023           /* compute QR decomposition of constraints */
2024           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2025           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2026           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2027           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2028           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2029           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2030           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2031 
2032           /* explictly compute R^-T */
2033           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2034           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2035           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2036           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2037           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2038           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2039           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2040           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2041           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2042           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2043 
2044           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2045           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2046           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2047           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2048           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2049           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2050           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2051           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2052           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2053 
2054           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2055              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2056              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2057           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2058           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2059           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2060           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2061           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2062           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2063           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2064           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2065           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2066           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2067 
2068           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2069           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2070           /* insert cols for primal dofs */
2071           for (j=0;j<primal_dofs;j++) {
2072             start_vals = &qr_basis[j*size_of_constraint];
2073             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2074             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2075           }
2076           /* insert cols for dual dofs */
2077           for (j=0,k=0;j<dual_dofs;k++) {
2078             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2079               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2080               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2081               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2082               j++;
2083             }
2084           }
2085 
2086           /* check change of basis */
2087           if (pcbddc->dbg_flag) {
2088             PetscInt   ii,jj;
2089             PetscBool valid_qr=PETSC_TRUE;
2090             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2091             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2092             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2093             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2094             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2095             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2096             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2097             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2098             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2099             for (jj=0;jj<size_of_constraint;jj++) {
2100               for (ii=0;ii<primal_dofs;ii++) {
2101                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2102                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2103               }
2104             }
2105             if (!valid_qr) {
2106               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2107               for (jj=0;jj<size_of_constraint;jj++) {
2108                 for (ii=0;ii<primal_dofs;ii++) {
2109                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2110                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2111                   }
2112                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2113                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2114                   }
2115                 }
2116               }
2117             } else {
2118               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2119             }
2120           }
2121         } else { /* simple transformation block */
2122           PetscInt row,col;
2123           PetscScalar val;
2124           for (j=0;j<size_of_constraint;j++) {
2125             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2126             if (!PetscBTLookup(is_primal,row)) {
2127               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2128               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2129               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2130             } else {
2131               for (k=0;k<size_of_constraint;k++) {
2132                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2133                 if (row != col) {
2134                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2135                 } else {
2136                   val = 1.0;
2137                 }
2138                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2139               }
2140             }
2141           }
2142           if (pcbddc->dbg_flag) {
2143             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2144           }
2145         }
2146         /* increment primal counter */
2147         primal_counter += primal_dofs;
2148       } else {
2149         if (pcbddc->dbg_flag) {
2150           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2151         }
2152       }
2153       /* increment constraint counter total_counts */
2154       total_counts += primal_dofs;
2155     }
2156 
2157     /* free workspace */
2158     if (qr_needed) {
2159       if (pcbddc->dbg_flag) {
2160         ierr = PetscFree(work);CHKERRQ(ierr);
2161       }
2162       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2163       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2164       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2165       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2166       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2167     }
2168     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2169     /* assembling */
2170     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2171     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172     /*
2173     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2174     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2175     */
2176   }
2177 
2178   /* get indices in local ordering for vertices and constraints */
2179   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2180     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2181     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2182   }
2183   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2184   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2185   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2186   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2187   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2188   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2189   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2190   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2191   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2192   /* set quantities in PCBDDC data struct */
2193   pcbddc->n_actual_vertices = i;
2194   /* check if a new primal space has been introduced */
2195   pcbddc->new_primal_space_local = PETSC_TRUE;
2196   if (olocal_primal_size == pcbddc->local_primal_size) {
2197     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2198     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2199     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2200   }
2201   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2202   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2203 
2204   /* flush dbg viewer */
2205   if (pcbddc->dbg_flag) {
2206     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2207   }
2208 
2209   /* free workspace */
2210   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2211   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2212   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2213   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2214   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2215   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2216   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2217   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2223 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2224 {
2225   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2226   PC_IS       *pcis = (PC_IS*)pc->data;
2227   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2228   PetscInt    ierr,i,vertex_size;
2229   PetscViewer viewer=pcbddc->dbg_viewer;
2230 
2231   PetscFunctionBegin;
2232   /* Reset previously computed graph */
2233   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2234   /* Init local Graph struct */
2235   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2236 
2237   /* Check validity of the csr graph passed in by the user */
2238   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2239     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2240   }
2241 
2242   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2243   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2244     Mat mat_adj;
2245     const PetscInt *xadj,*adjncy;
2246     PetscBool flg_row=PETSC_TRUE;
2247 
2248     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2249     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2250     if (!flg_row) {
2251       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2252     }
2253     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2254     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2255     if (!flg_row) {
2256       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2257     }
2258     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2259   }
2260 
2261   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2262   vertex_size = 1;
2263   if (pcbddc->user_provided_isfordofs) {
2264     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2265       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2266       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2267         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2268         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2269       }
2270       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2271       pcbddc->n_ISForDofs = 0;
2272       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2273     }
2274     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2275     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2276   } else {
2277     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2278       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2279       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2280       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2281         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2282       }
2283     }
2284   }
2285 
2286   /* Setup of Graph */
2287   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2288     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2289   }
2290   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2291     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2292   }
2293   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2294 
2295   /* Graph's connected components analysis */
2296   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2297 
2298   /* print some info to stdout */
2299   if (pcbddc->dbg_flag) {
2300     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2301   }
2302 
2303   /* mark topography has done */
2304   pcbddc->recompute_topography = PETSC_FALSE;
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2310 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2311 {
2312   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2313   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   n = 0;
2318   vertices = 0;
2319   if (pcbddc->ConstraintMatrix) {
2320     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2321     for (i=0;i<local_primal_size;i++) {
2322       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2323       if (size_of_constraint == 1) n++;
2324       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2325     }
2326     if (vertices_idx) {
2327       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2328       n = 0;
2329       for (i=0;i<local_primal_size;i++) {
2330         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2331         if (size_of_constraint == 1) {
2332           vertices[n++]=row_cmat_indices[0];
2333         }
2334         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2335       }
2336     }
2337   }
2338   *n_vertices = n;
2339   if (vertices_idx) *vertices_idx = vertices;
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2345 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2346 {
2347   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2348   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2349   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2350   PetscBT        touched;
2351   PetscErrorCode ierr;
2352 
2353     /* This function assumes that the number of local constraints per connected component
2354        is not greater than the number of nodes defined for the connected component
2355        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2356   PetscFunctionBegin;
2357   n = 0;
2358   constraints_index = 0;
2359   if (pcbddc->ConstraintMatrix) {
2360     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2361     max_size_of_constraint = 0;
2362     for (i=0;i<local_primal_size;i++) {
2363       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2364       if (size_of_constraint > 1) {
2365         n++;
2366       }
2367       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2368       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2369     }
2370     if (constraints_idx) {
2371       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2372       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2373       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2374       n = 0;
2375       for (i=0;i<local_primal_size;i++) {
2376         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2377         if (size_of_constraint > 1) {
2378           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2379           /* find first untouched local node */
2380           j = 0;
2381           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2382           min_index = row_cmat_global_indices[j];
2383           min_loc = j;
2384           /* search the minimum among nodes not yet touched on the connected component
2385              since there can be more than one constraint on a single cc */
2386           for (j=1;j<size_of_constraint;j++) {
2387             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2388               min_index = row_cmat_global_indices[j];
2389               min_loc = j;
2390             }
2391           }
2392           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2393           constraints_index[n++] = row_cmat_indices[min_loc];
2394         }
2395         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2396       }
2397       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2398       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2399     }
2400   }
2401   *n_constraints = n;
2402   if (constraints_idx) *constraints_idx = constraints_index;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 /* the next two functions has been adapted from pcis.c */
2407 #undef __FUNCT__
2408 #define __FUNCT__ "PCBDDCApplySchur"
2409 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2410 {
2411   PetscErrorCode ierr;
2412   PC_IS          *pcis = (PC_IS*)(pc->data);
2413 
2414   PetscFunctionBegin;
2415   if (!vec2_B) { vec2_B = v; }
2416   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2417   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2418   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2419   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2420   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2426 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2427 {
2428   PetscErrorCode ierr;
2429   PC_IS          *pcis = (PC_IS*)(pc->data);
2430 
2431   PetscFunctionBegin;
2432   if (!vec2_B) { vec2_B = v; }
2433   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2434   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2435   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2436   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2437   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "PCBDDCSubsetNumbering"
2443 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2444 {
2445   Vec            local_vec,global_vec;
2446   IS             seqis,paris;
2447   VecScatter     scatter_ctx;
2448   PetscScalar    *array;
2449   PetscInt       *temp_global_dofs;
2450   PetscScalar    globalsum;
2451   PetscInt       i,j,s;
2452   PetscInt       nlocals,first_index,old_index,max_local;
2453   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2454   PetscMPIInt    *dof_sizes,*dof_displs;
2455   PetscBool      first_found;
2456   PetscErrorCode ierr;
2457 
2458   PetscFunctionBegin;
2459   /* mpi buffers */
2460   MPI_Comm_size(comm,&size_prec_comm);
2461   MPI_Comm_rank(comm,&rank_prec_comm);
2462   j = ( !rank_prec_comm ? size_prec_comm : 0);
2463   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2464   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2465   /* get maximum size of subset */
2466   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2467   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2468   max_local = 0;
2469   if (n_local_dofs) {
2470     max_local = temp_global_dofs[0];
2471     for (i=1;i<n_local_dofs;i++) {
2472       if (max_local < temp_global_dofs[i] ) {
2473         max_local = temp_global_dofs[i];
2474       }
2475     }
2476   }
2477   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2478   max_global++;
2479   max_local = 0;
2480   if (n_local_dofs) {
2481     max_local = local_dofs[0];
2482     for (i=1;i<n_local_dofs;i++) {
2483       if (max_local < local_dofs[i] ) {
2484         max_local = local_dofs[i];
2485       }
2486     }
2487   }
2488   max_local++;
2489   /* allocate workspace */
2490   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2491   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2492   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2493   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2494   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2495   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2496   /* create scatter */
2497   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2498   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2499   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2500   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2501   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2502   /* init array */
2503   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2504   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2505   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2506   if (local_dofs_mult) {
2507     for (i=0;i<n_local_dofs;i++) {
2508       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2509     }
2510   } else {
2511     for (i=0;i<n_local_dofs;i++) {
2512       array[local_dofs[i]]=1.0;
2513     }
2514   }
2515   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2516   /* scatter into global vec and get total number of global dofs */
2517   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2518   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2519   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2520   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2521   /* Fill global_vec with cumulative function for global numbering */
2522   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2523   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2524   nlocals = 0;
2525   first_index = -1;
2526   first_found = PETSC_FALSE;
2527   for (i=0;i<s;i++) {
2528     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2529       first_found = PETSC_TRUE;
2530       first_index = i;
2531     }
2532     nlocals += (PetscInt)PetscRealPart(array[i]);
2533   }
2534   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2535   if (!rank_prec_comm) {
2536     dof_displs[0]=0;
2537     for (i=1;i<size_prec_comm;i++) {
2538       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2539     }
2540   }
2541   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2542   if (first_found) {
2543     array[first_index] += (PetscScalar)nlocals;
2544     old_index = first_index;
2545     for (i=first_index+1;i<s;i++) {
2546       if (PetscRealPart(array[i]) > 0.0) {
2547         array[i] += array[old_index];
2548         old_index = i;
2549       }
2550     }
2551   }
2552   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2553   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2554   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2555   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2556   /* get global ordering of local dofs */
2557   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2558   if (local_dofs_mult) {
2559     for (i=0;i<n_local_dofs;i++) {
2560       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2561     }
2562   } else {
2563     for (i=0;i<n_local_dofs;i++) {
2564       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2565     }
2566   }
2567   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2568   /* free workspace */
2569   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2570   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2571   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2572   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2573   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2574   /* return pointer to global ordering of local dofs */
2575   *global_numbering_subset = temp_global_dofs;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2581 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2582 {
2583   PetscInt       i,j;
2584   PetscScalar    *alphas;
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   /* this implements stabilized Gram-Schmidt */
2589   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2590   for (i=0;i<n;i++) {
2591     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2592     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2593     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2594   }
2595   ierr = PetscFree(alphas);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 /* TODO
2600    - now preallocation is done assuming SEQDENSE local matrices
2601 */
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatISGetMPIXAIJ"
2604 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2605 {
2606   Mat                    new_mat;
2607   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2608   /* info on mat */
2609   /* ISLocalToGlobalMapping rmapping,cmapping; */
2610   PetscInt               bs,rows,cols;
2611   PetscInt               lrows,lcols;
2612   PetscInt               local_rows,local_cols;
2613   PetscBool              isdense;
2614   /* values insertion */
2615   PetscScalar            *array;
2616   PetscInt               *local_indices,*global_indices;
2617   /* work */
2618   PetscInt               i,j,index_row;
2619   PetscErrorCode         ierr;
2620 
2621   PetscFunctionBegin;
2622   /* MISSING CHECKS
2623     - rectangular case not covered (it is not allowed by MATIS)
2624   */
2625   /* get info from mat */
2626   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2627   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2628   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2629   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2630 
2631   /* work */
2632   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2633   for (i=0;i<local_rows;i++) local_indices[i]=i;
2634   /* map indices of local mat to global values */
2635   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2636   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2637   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2638 
2639   if (reuse==MAT_INITIAL_MATRIX) {
2640     Vec         vec_dnz,vec_onz;
2641     PetscScalar *my_dnz,*my_onz;
2642     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2643     PetscInt    index_col,owner;
2644     PetscMPIInt nsubdomains;
2645 
2646     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2647     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2648     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2649     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2650     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2651     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2652     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2653 
2654     /*
2655       preallocation
2656     */
2657 
2658     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2659     /*
2660        Some vectors are needed to sum up properly on shared interface dofs.
2661        Preallocation macros cannot do the job.
2662        Note that preallocation is not exact, since it overestimates nonzeros
2663     */
2664     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2665     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2666     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2667     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2668     /* All processes need to compute entire row ownership */
2669     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2670     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2671     for (i=0;i<nsubdomains;i++) {
2672       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2673         row_ownership[j]=i;
2674       }
2675     }
2676 
2677     /*
2678        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2679        then, they will be summed up properly. This way, preallocation is always sufficient
2680     */
2681     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2682     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2683     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2684     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2685     for (i=0;i<local_rows;i++) {
2686       index_row = global_indices[i];
2687       for (j=i;j<local_rows;j++) {
2688         owner = row_ownership[index_row];
2689         index_col = global_indices[j];
2690         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2691           my_dnz[i] += 1.0;
2692         } else { /* offdiag block */
2693           my_onz[i] += 1.0;
2694         }
2695         /* same as before, interchanging rows and cols */
2696         if (i != j) {
2697           owner = row_ownership[index_col];
2698           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2699             my_dnz[j] += 1.0;
2700           } else {
2701             my_onz[j] += 1.0;
2702           }
2703         }
2704       }
2705     }
2706     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2707     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2708     if (local_rows) { /* multilevel guard */
2709       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2710       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2711     }
2712     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2713     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2714     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2715     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2716     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2717     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2718     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2719 
2720     /* set computed preallocation in dnz and onz */
2721     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2722     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2723     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2724     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2725     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2726     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2727     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2728     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2729 
2730     /* Resize preallocation if overestimated */
2731     for (i=0;i<lrows;i++) {
2732       dnz[i] = PetscMin(dnz[i],lcols);
2733       onz[i] = PetscMin(onz[i],cols-lcols);
2734     }
2735     /* set preallocation */
2736     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2737     for (i=0;i<lrows/bs;i++) {
2738       dnz[i] = dnz[i*bs]/bs;
2739       onz[i] = onz[i*bs]/bs;
2740     }
2741     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2742     for (i=0;i<lrows/bs;i++) {
2743       dnz[i] = dnz[i]-i;
2744     }
2745     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2746     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2747     *M = new_mat;
2748   } else {
2749     PetscInt mbs,mrows,mcols;
2750     /* some checks */
2751     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2752     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2753     if (mrows != rows) {
2754       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2755     }
2756     if (mrows != rows) {
2757       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2758     }
2759     if (mbs != bs) {
2760       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2761     }
2762     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2763   }
2764   /* set local to global mappings */
2765   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2766   /* Set values */
2767   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2768   if (isdense) { /* special case for dense local matrices */
2769     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2770     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2771     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2772     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2773     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2774     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2775   } else { /* very basic values insertion for all other matrix types */
2776     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2777     for (i=0;i<local_rows;i++) {
2778       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2779       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2780       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2781       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2782       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2783       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2784     }
2785     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2786   }
2787   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789   if (isdense) {
2790     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2791   }
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatISGetSubassemblingPattern"
2797 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2798 {
2799   Mat             subdomain_adj;
2800   IS              new_ranks,ranks_send_to;
2801   MatPartitioning partitioner;
2802   Mat_IS          *matis;
2803   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2804   PetscInt        prank;
2805   PetscMPIInt     size,rank,color;
2806   PetscInt        *xadj,*adjncy,*oldranks;
2807   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2808   PetscInt        i,j,local_size,threshold=0;
2809   PetscErrorCode  ierr;
2810   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2811   PetscSubcomm    subcomm;
2812 
2813   PetscFunctionBegin;
2814   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2815   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2816   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2817 
2818   /* Get info on mapping */
2819   matis = (Mat_IS*)(mat->data);
2820   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2821   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2822 
2823   /* build local CSR graph of subdomains' connectivity */
2824   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2825   xadj[0] = 0;
2826   xadj[1] = PetscMax(n_neighs-1,0);
2827   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2828   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2829 
2830   if (threshold) {
2831     PetscInt* count,min_threshold;
2832     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2833     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2834     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2835       for (j=0;j<n_shared[i];j++) {
2836         count[shared[i][j]] += 1;
2837       }
2838     }
2839     /* adapt threshold since we dont want to lose significant connections */
2840     min_threshold = n_neighs;
2841     for (i=1;i<n_neighs;i++) {
2842       for (j=0;j<n_shared[i];j++) {
2843         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2844       }
2845     }
2846     threshold = PetscMax(min_threshold+1,threshold);
2847     xadj[1] = 0;
2848     for (i=1;i<n_neighs;i++) {
2849       for (j=0;j<n_shared[i];j++) {
2850         if (count[shared[i][j]] < threshold) {
2851           adjncy[xadj[1]] = neighs[i];
2852           adjncy_wgt[xadj[1]] = n_shared[i];
2853           xadj[1]++;
2854           break;
2855         }
2856       }
2857     }
2858     ierr = PetscFree(count);CHKERRQ(ierr);
2859   } else {
2860     if (xadj[1]) {
2861       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2862       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2863     }
2864   }
2865   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2866   if (use_square) {
2867     for (i=0;i<xadj[1];i++) {
2868       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2869     }
2870   }
2871   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2872 
2873   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2874 
2875   /*
2876     Restrict work on active processes only.
2877   */
2878   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2879   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2880   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2881   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2882   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2883   if (color) {
2884     ierr = PetscFree(xadj);CHKERRQ(ierr);
2885     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2886     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2887   } else {
2888     PetscInt coarsening_ratio;
2889     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2890     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2891     prank = rank;
2892     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2893     /*
2894     for (i=0;i<size;i++) {
2895       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2896     }
2897     */
2898     for (i=0;i<xadj[1];i++) {
2899       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2900     }
2901     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2902     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2903     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2904 
2905     /* Partition */
2906     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2907     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2908     if (use_vwgt) {
2909       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2910       v_wgt[0] = local_size;
2911       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2912     }
2913     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2914     coarsening_ratio = size/n_subdomains;
2915     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2916     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2917     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2918     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2919     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2920 
2921     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2922     if (contiguous) {
2923       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2924     } else {
2925       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2926     }
2927     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2928     /* clean up */
2929     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2930     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2931     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2932     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2933   }
2934   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2935 
2936   /* assemble parallel IS for sends */
2937   i = 1;
2938   if (color) i=0;
2939   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2940 
2941   /* get back IS */
2942   *is_sends = ranks_send_to;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2947 
2948 #undef __FUNCT__
2949 #define __FUNCT__ "MatISSubassemble"
2950 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2951 {
2952   Mat                    local_mat;
2953   Mat_IS                 *matis;
2954   IS                     is_sends_internal;
2955   PetscInt               rows,cols;
2956   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2957   PetscBool              ismatis,isdense,destroy_mat;
2958   ISLocalToGlobalMapping l2gmap;
2959   PetscInt*              l2gmap_indices;
2960   const PetscInt*        is_indices;
2961   MatType                new_local_type;
2962   /* buffers */
2963   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2964   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2965   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2966   /* MPI */
2967   MPI_Comm               comm,comm_n;
2968   PetscSubcomm           subcomm;
2969   PetscMPIInt            n_sends,n_recvs,commsize;
2970   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2971   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2972   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2973   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2974   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2975   PetscErrorCode         ierr;
2976 
2977   PetscFunctionBegin;
2978   /* TODO: add missing checks */
2979   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2980   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2981   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2982   PetscValidLogicalCollectiveInt(mat,nis,7);
2983   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2984   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2985   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2986   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2987   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2988   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2989   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2990   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2991     PetscInt mrows,mcols,mnrows,mncols;
2992     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2993     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2994     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2995     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2996     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2997     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2998   }
2999   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3000   PetscValidLogicalCollectiveInt(mat,bs,0);
3001   /* prepare IS for sending if not provided */
3002   if (!is_sends) {
3003     PetscBool pcontig = PETSC_TRUE;
3004     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3005     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3006   } else {
3007     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3008     is_sends_internal = is_sends;
3009   }
3010 
3011   /* get pointer of MATIS data */
3012   matis = (Mat_IS*)mat->data;
3013 
3014   /* get comm */
3015   comm = PetscObjectComm((PetscObject)mat);
3016 
3017   /* compute number of sends */
3018   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3019   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3020 
3021   /* compute number of receives */
3022   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3023   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
3024   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3025   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3026   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3027   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3028   ierr = PetscFree(iflags);CHKERRQ(ierr);
3029 
3030   /* restrict comm if requested */
3031   subcomm = 0;
3032   destroy_mat = PETSC_FALSE;
3033   if (restrict_comm) {
3034     PetscMPIInt color,rank,subcommsize;
3035     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3036     color = 0;
3037     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3038     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3039     subcommsize = commsize - subcommsize;
3040     /* check if reuse has been requested */
3041     if (reuse == MAT_REUSE_MATRIX) {
3042       if (*mat_n) {
3043         PetscMPIInt subcommsize2;
3044         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3045         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3046         comm_n = PetscObjectComm((PetscObject)*mat_n);
3047       } else {
3048         comm_n = PETSC_COMM_SELF;
3049       }
3050     } else { /* MAT_INITIAL_MATRIX */
3051       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3052       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3053       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3054       comm_n = subcomm->comm;
3055     }
3056     /* flag to destroy *mat_n if not significative */
3057     if (color) destroy_mat = PETSC_TRUE;
3058   } else {
3059     comm_n = comm;
3060   }
3061 
3062   /* prepare send/receive buffers */
3063   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
3064   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3065   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
3066   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3067   if (nis) {
3068     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3069     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3070   }
3071 
3072   /* Get data from local matrices */
3073   if (!isdense) {
3074     /* TODO: See below some guidelines on how to prepare the local buffers */
3075     /*
3076        send_buffer_vals should contain the raw values of the local matrix
3077        send_buffer_idxs should contain:
3078        - MatType_PRIVATE type
3079        - PetscInt        size_of_l2gmap
3080        - PetscInt        global_row_indices[size_of_l2gmap]
3081        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3082     */
3083   } else {
3084     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3085     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3086     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
3087     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3088     send_buffer_idxs[1] = i;
3089     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3090     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3091     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3092     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3093     for (i=0;i<n_sends;i++) {
3094       ilengths_vals[is_indices[i]] = len*len;
3095       ilengths_idxs[is_indices[i]] = len+2;
3096     }
3097   }
3098   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3099   /* additional is (if any) */
3100   if (nis) {
3101     PetscMPIInt psum;
3102     PetscInt j;
3103     for (j=0,psum=0;j<nis;j++) {
3104       PetscInt plen;
3105       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3106       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3107       psum += len+1; /* indices + lenght */
3108     }
3109     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3110     for (j=0,psum=0;j<nis;j++) {
3111       PetscInt plen;
3112       const PetscInt *is_array_idxs;
3113       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3114       send_buffer_idxs_is[psum] = plen;
3115       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3116       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3117       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3118       psum += plen+1; /* indices + lenght */
3119     }
3120     for (i=0;i<n_sends;i++) {
3121       ilengths_idxs_is[is_indices[i]] = psum;
3122     }
3123     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3124   }
3125 
3126   buf_size_idxs = 0;
3127   buf_size_vals = 0;
3128   buf_size_idxs_is = 0;
3129   for (i=0;i<n_recvs;i++) {
3130     buf_size_idxs += (PetscInt)olengths_idxs[i];
3131     buf_size_vals += (PetscInt)olengths_vals[i];
3132     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3133   }
3134   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
3135   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
3136   ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr);
3137 
3138   /* get new tags for clean communications */
3139   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3140   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3141   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3142 
3143   /* allocate for requests */
3144   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
3145   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
3146   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr);
3147   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
3148   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
3149   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr);
3150 
3151   /* communications */
3152   ptr_idxs = recv_buffer_idxs;
3153   ptr_vals = recv_buffer_vals;
3154   ptr_idxs_is = recv_buffer_idxs_is;
3155   for (i=0;i<n_recvs;i++) {
3156     source_dest = onodes[i];
3157     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3158     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3159     ptr_idxs += olengths_idxs[i];
3160     ptr_vals += olengths_vals[i];
3161     if (nis) {
3162       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3163       ptr_idxs_is += olengths_idxs_is[i];
3164     }
3165   }
3166   for (i=0;i<n_sends;i++) {
3167     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3168     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3169     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3170     if (nis) {
3171       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3172     }
3173   }
3174   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3175   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3176 
3177   /* assemble new l2g map */
3178   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3179   ptr_idxs = recv_buffer_idxs;
3180   buf_size_idxs = 0;
3181   for (i=0;i<n_recvs;i++) {
3182     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3183     ptr_idxs += olengths_idxs[i];
3184   }
3185   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3186   ptr_idxs = recv_buffer_idxs;
3187   buf_size_idxs = 0;
3188   for (i=0;i<n_recvs;i++) {
3189     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3190     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3191     ptr_idxs += olengths_idxs[i];
3192   }
3193   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3194   ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3195   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3196 
3197   /* infer new local matrix type from received local matrices type */
3198   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3199   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3200   if (n_recvs) {
3201     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3202     ptr_idxs = recv_buffer_idxs;
3203     for (i=0;i<n_recvs;i++) {
3204       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3205         new_local_type_private = MATAIJ_PRIVATE;
3206         break;
3207       }
3208       ptr_idxs += olengths_idxs[i];
3209     }
3210     switch (new_local_type_private) {
3211       case MATDENSE_PRIVATE:
3212         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3213           new_local_type = MATSEQAIJ;
3214           bs = 1;
3215         } else { /* if I receive only 1 dense matrix */
3216           new_local_type = MATSEQDENSE;
3217           bs = 1;
3218         }
3219         break;
3220       case MATAIJ_PRIVATE:
3221         new_local_type = MATSEQAIJ;
3222         bs = 1;
3223         break;
3224       case MATBAIJ_PRIVATE:
3225         new_local_type = MATSEQBAIJ;
3226         break;
3227       case MATSBAIJ_PRIVATE:
3228         new_local_type = MATSEQSBAIJ;
3229         break;
3230       default:
3231         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3232         break;
3233     }
3234   } else { /* by default, new_local_type is seqdense */
3235     new_local_type = MATSEQDENSE;
3236     bs = 1;
3237   }
3238 
3239   /* create MATIS object if needed */
3240   if (reuse == MAT_INITIAL_MATRIX) {
3241     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3242     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3243   } else {
3244     /* it also destroys the local matrices */
3245     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3246   }
3247   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3248   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3249   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3250   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3251 
3252   /* set values */
3253   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3254   ptr_vals = recv_buffer_vals;
3255   ptr_idxs = recv_buffer_idxs;
3256   for (i=0;i<n_recvs;i++) {
3257     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3258       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3259       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3260       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3261       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3262       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3263     } else {
3264       /* TODO */
3265     }
3266     ptr_idxs += olengths_idxs[i];
3267     ptr_vals += olengths_vals[i];
3268   }
3269   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3270   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3271   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3272   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3273 
3274 #if 0
3275   if (!restrict_comm) { /* check */
3276     Vec       lvec,rvec;
3277     PetscReal infty_error;
3278 
3279     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3280     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3281     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3282     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3283     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3284     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3285     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3286     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3287     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3288   }
3289 #endif
3290 
3291   /* assemble new additional is (if any) */
3292   if (nis) {
3293     PetscInt **temp_idxs,*count_is,j,psum;
3294 
3295     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3296     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3297     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3298     ptr_idxs = recv_buffer_idxs_is;
3299     psum = 0;
3300     for (i=0;i<n_recvs;i++) {
3301       for (j=0;j<nis;j++) {
3302         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3303         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3304         psum += plen;
3305         ptr_idxs += plen+1; /* shift pointer to received data */
3306       }
3307     }
3308     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3309     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3310     for (i=1;i<nis;i++) {
3311       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3312     }
3313     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3314     ptr_idxs = recv_buffer_idxs_is;
3315     for (i=0;i<n_recvs;i++) {
3316       for (j=0;j<nis;j++) {
3317         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3318         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3319         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3320         ptr_idxs += plen+1; /* shift pointer to received data */
3321       }
3322     }
3323     for (i=0;i<nis;i++) {
3324       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3325       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3326       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3327     }
3328     ierr = PetscFree(count_is);CHKERRQ(ierr);
3329     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3330     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3331   }
3332   /* free workspace */
3333   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3334   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3335   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3336   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3337   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3338   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3339   if (isdense) {
3340     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3341     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3342   } else {
3343     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3344   }
3345   if (nis) {
3346     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3347     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3348   }
3349   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3350   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3351   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3352   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3353   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3354   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3355   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3356   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3357   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3358   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3359   ierr = PetscFree(onodes);CHKERRQ(ierr);
3360   if (nis) {
3361     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3362     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3363     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3364   }
3365   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3366   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3367     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3368     for (i=0;i<nis;i++) {
3369       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3370     }
3371   }
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 /* temporary hack into ksp private data structure */
3376 #include <petsc-private/kspimpl.h>
3377 
3378 #undef __FUNCT__
3379 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3380 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3381 {
3382   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3383   PC_IS                  *pcis = (PC_IS*)pc->data;
3384   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3385   MatNullSpace           CoarseNullSpace=NULL;
3386   ISLocalToGlobalMapping coarse_islg;
3387   IS                     coarse_is,*isarray;
3388   PetscInt               i,im_active=-1,active_procs=-1;
3389   PetscInt               nis,nisdofs,nisneu;
3390   PC                     pc_temp;
3391   PCType                 coarse_pc_type;
3392   KSPType                coarse_ksp_type;
3393   PetscBool              multilevel_requested,multilevel_allowed;
3394   PetscBool              setsym,issym,isherm,isbddc,isnn,coarse_reuse;
3395   MatStructure           matstruct;
3396   Mat                    t_coarse_mat_is;
3397   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3398   PetscMPIInt            all_procs;
3399   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3400   PetscBool              compute_vecs = PETSC_FALSE;
3401   PetscErrorCode         ierr;
3402 
3403   PetscFunctionBegin;
3404   /* Assign global numbering to coarse dofs */
3405   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3406     compute_vecs = PETSC_TRUE;
3407     PetscInt ocoarse_size;
3408     ocoarse_size = pcbddc->coarse_size;
3409     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3410     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3411     /* see if we can avoid some work */
3412     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3413       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3414         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3415         coarse_reuse = PETSC_FALSE;
3416       } else { /* we can safely reuse already computed coarse matrix */
3417         coarse_reuse = PETSC_TRUE;
3418       }
3419     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3420       coarse_reuse = PETSC_FALSE;
3421     }
3422     /* reset any subassembling information */
3423     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3424     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3425   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3426     coarse_reuse = PETSC_TRUE;
3427   }
3428 
3429   /* count "active" (i.e. with positive local size) and "void" processes */
3430   im_active = !!(pcis->n);
3431   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3432   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3433   void_procs = all_procs-active_procs;
3434   csin_type_simple = PETSC_TRUE;
3435   if (pcbddc->current_level) {
3436     csin_ml = PETSC_TRUE;
3437     ncoarse_ml = void_procs;
3438     csin_ds = PETSC_TRUE;
3439     ncoarse_ds = void_procs;
3440     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3441   } else {
3442     csin_ml = PETSC_FALSE;
3443     ncoarse_ml = all_procs;
3444     if (void_procs) {
3445       csin_ds = PETSC_TRUE;
3446       ncoarse_ds = void_procs;
3447       csin_type_simple = PETSC_FALSE;
3448     } else {
3449       csin_ds = PETSC_FALSE;
3450       ncoarse_ds = all_procs;
3451     }
3452   }
3453 
3454   /*
3455     test if we can go multilevel: three conditions must be satisfied:
3456     - we have not exceeded the number of levels requested
3457     - we can actually subassemble the active processes
3458     - we can find a suitable number of MPI processes where we can place the subassembled problem
3459   */
3460   multilevel_allowed = PETSC_FALSE;
3461   multilevel_requested = PETSC_FALSE;
3462   if (pcbddc->current_level < pcbddc->max_levels) {
3463     multilevel_requested = PETSC_TRUE;
3464     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3465       multilevel_allowed = PETSC_FALSE;
3466     } else {
3467       multilevel_allowed = PETSC_TRUE;
3468     }
3469   }
3470   /* determine number of process partecipating to coarse solver */
3471   if (multilevel_allowed) {
3472     ncoarse = ncoarse_ml;
3473     csin = csin_ml;
3474   } else {
3475     ncoarse = ncoarse_ds;
3476     csin = csin_ds;
3477   }
3478 
3479   /* creates temporary l2gmap and IS for coarse indexes */
3480   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3481   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3482 
3483   /* creates temporary MATIS object for coarse matrix */
3484   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3485   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3486   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3487   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3488   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3490 
3491   /* compute dofs splitting and neumann boundaries for coarse dofs */
3492   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3493     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3494     const PetscInt         *idxs;
3495     ISLocalToGlobalMapping tmap;
3496 
3497     /* create map between primal indices (in local representative ordering) and local primal numbering */
3498     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3499     /* allocate space for temporary storage */
3500     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3501     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3502     /* allocate for IS array */
3503     nisdofs = pcbddc->n_ISForDofsLocal;
3504     nisneu = !!pcbddc->NeumannBoundariesLocal;
3505     nis = nisdofs + nisneu;
3506     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3507     /* dofs splitting */
3508     for (i=0;i<nisdofs;i++) {
3509       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3510       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3511       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3512       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3513       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3514       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3515       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3516       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3517     }
3518     /* neumann boundaries */
3519     if (pcbddc->NeumannBoundariesLocal) {
3520       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3521       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3522       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3523       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3524       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3525       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3526       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3527       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3528     }
3529     /* free memory */
3530     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3531     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3532     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3533   } else {
3534     nis = 0;
3535     nisdofs = 0;
3536     nisneu = 0;
3537     isarray = NULL;
3538   }
3539   /* destroy no longer needed map */
3540   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3541 
3542   /* restrict on coarse candidates (if needed) */
3543   coarse_mat_is = NULL;
3544   if (csin) {
3545     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3546       PetscInt j,tissize,*nisindices;
3547       PetscInt *coarse_candidates;
3548       const PetscInt* tisindices;
3549       /* get coarse candidates' ranks in pc communicator */
3550       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3551       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3552       for (i=0,j=0;i<all_procs;i++) {
3553         if (!coarse_candidates[i]) {
3554           coarse_candidates[j]=i;
3555           j++;
3556         }
3557       }
3558       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3559       /* get a suitable subassembling pattern */
3560       if (csin_type_simple) {
3561         PetscMPIInt rank;
3562         PetscInt    issize,isidx;
3563         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3564         if (im_active) {
3565           issize = 1;
3566           isidx = (PetscInt)rank;
3567         } else {
3568           issize = 0;
3569           isidx = -1;
3570         }
3571         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3572       } else {
3573         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3574       }
3575       if (pcbddc->dbg_flag) {
3576         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3577         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3578         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3579         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3580         for (i=0;i<j;i++) {
3581           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3582         }
3583         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3584         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3585       }
3586       /* shift the pattern on coarse candidates */
3587       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3588       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3589       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3590       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3591       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3592       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3593       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3594     }
3595     if (pcbddc->dbg_flag) {
3596       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3597       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3598       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3599       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3600     }
3601     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3602     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3603   } else {
3604     if (pcbddc->dbg_flag) {
3605       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3606       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3607       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3608     }
3609     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3610     coarse_mat_is = t_coarse_mat_is;
3611   }
3612 
3613   /* create local to global scatters for coarse problem */
3614   if (compute_vecs) {
3615     PetscInt lrows;
3616     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3617     if (coarse_mat_is) {
3618       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3619     } else {
3620       lrows = 0;
3621     }
3622     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3623     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3624     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3625     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3626     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3627   }
3628   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3629   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3630 
3631   /* set defaults for coarse KSP and PC */
3632   if (multilevel_allowed) {
3633     coarse_ksp_type = KSPRICHARDSON;
3634     coarse_pc_type = PCBDDC;
3635   } else {
3636     coarse_ksp_type = KSPPREONLY;
3637     coarse_pc_type = PCREDUNDANT;
3638   }
3639 
3640   /* print some info if requested */
3641   if (pcbddc->dbg_flag) {
3642     if (!multilevel_allowed) {
3643       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3644       if (multilevel_requested) {
3645         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3646       } else if (pcbddc->max_levels) {
3647         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3648       }
3649       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3650     }
3651   }
3652 
3653   /* create the coarse KSP object only once with defaults */
3654   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3655   issym = PETSC_FALSE;
3656   isherm = PETSC_FALSE;
3657   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3658   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3659   if (coarse_mat_is) {
3660     MatReuse coarse_mat_reuse;
3661     PetscViewer dbg_viewer;
3662     if (pcbddc->dbg_flag) {
3663       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3664       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3665     }
3666     if (!pcbddc->coarse_ksp) {
3667       char prefix[256],str_level[16];
3668       size_t len;
3669       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3670       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3671       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3672       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr);
3673       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3674       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3675       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3676       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3677       /* prefix */
3678       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3679       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3680       if (!pcbddc->current_level) {
3681         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3682         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3683       } else {
3684         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3685         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3686         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3687         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3688         *(prefix+len)='\0';
3689         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3690         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3691       }
3692       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3693       /* allow user customization */
3694       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3695     }
3696 
3697     /* get some info after set from options */
3698     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3699     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3700     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3701     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3702       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3703       isbddc = PETSC_FALSE;
3704     }
3705 
3706     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3707     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3708     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3709     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3710     if (nisdofs) {
3711       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3712       for (i=0;i<nisdofs;i++) {
3713         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3714       }
3715     }
3716     if (nisneu) {
3717       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3718       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3719     }
3720 
3721     /* assemble coarse matrix */
3722     if (coarse_reuse) {
3723       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3724       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3725       coarse_mat_reuse = MAT_REUSE_MATRIX;
3726     } else {
3727       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3728     }
3729     if (isbddc || isnn) {
3730       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3731         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3732         if (pcbddc->dbg_flag) {
3733           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3734           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3735           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3736           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3737         }
3738       }
3739       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3740     } else {
3741       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3742     }
3743     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3744 
3745     /* propagate symmetry info to coarse matrix */
3746     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3747     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3748 
3749     /* set operators */
3750     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3751     if (pcbddc->dbg_flag) {
3752       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3753     }
3754   } else { /* processes non partecipating to coarse solver (if any) */
3755     coarse_mat = 0;
3756   }
3757   ierr = PetscFree(isarray);CHKERRQ(ierr);
3758 
3759   /* Compute coarse null space (special handling by BDDC only) */
3760   if (pcbddc->NullSpace) {
3761     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3762   }
3763 
3764   if (pcbddc->coarse_ksp) {
3765     Vec crhs,csol;
3766     PetscBool ispreonly;
3767     if (CoarseNullSpace) {
3768       if (isbddc) {
3769         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3770       } else {
3771         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3772       }
3773     }
3774     /* setup coarse ksp */
3775     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3776     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3777     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3778     /* hack */
3779     if (!csol) {
3780       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3781     }
3782     if (!crhs) {
3783       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3784     }
3785     /* Check coarse problem if in debug mode or if solving with an iterative method */
3786     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3787     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3788       KSP       check_ksp;
3789       KSPType   check_ksp_type;
3790       PC        check_pc;
3791       Vec       check_vec,coarse_vec;
3792       PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3793       PetscInt  its;
3794       PetscBool compute_eigs;
3795       PetscReal *eigs_r,*eigs_c;
3796       PetscInt  neigs;
3797 
3798       /* Create ksp object suitable for estimation of extreme eigenvalues */
3799       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3800       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3801       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3802       if (ispreonly) {
3803         check_ksp_type = KSPPREONLY;
3804         compute_eigs = PETSC_FALSE;
3805       } else {
3806         check_ksp_type = KSPGMRES;
3807         compute_eigs = PETSC_TRUE;
3808       }
3809       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3810       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3811       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3812       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3813       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3814       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3815       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3816       /* create random vec */
3817       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3818       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3819       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3820       if (CoarseNullSpace) {
3821         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3822       }
3823       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3824       /* solve coarse problem */
3825       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3826       if (CoarseNullSpace) {
3827         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3828       }
3829       /* set eigenvalue estimation if preonly has not been requested */
3830       if (compute_eigs) {
3831         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3832         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3833         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3834         lambda_max = eigs_r[neigs-1];
3835         lambda_min = eigs_r[0];
3836         if (pcbddc->use_coarse_estimates) {
3837           if (lambda_max>lambda_min) {
3838             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3839             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3840           }
3841         }
3842       }
3843 
3844       /* check coarse problem residual error */
3845       if (pcbddc->dbg_flag) {
3846         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3847         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3848         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3849         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3850         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3851         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3852         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3853         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3854         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3855         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3856         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3857         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3858         if (compute_eigs) {
3859           PetscReal lambda_max_s,lambda_min_s;
3860           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3861           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3862           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3863           for (i=0;i<neigs;i++) {
3864             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3865           }
3866         }
3867         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3868         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3869       }
3870       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3871       if (compute_eigs) {
3872         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3873         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3874       }
3875     }
3876   }
3877   /* print additional info */
3878   if (pcbddc->dbg_flag) {
3879     /* waits until all processes reaches this point */
3880     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3881     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3882     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3883   }
3884 
3885   /* free memory */
3886   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3887   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3888   PetscFunctionReturn(0);
3889 }
3890 
3891 #undef __FUNCT__
3892 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3893 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3894 {
3895   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3896   PC_IS*         pcis = (PC_IS*)pc->data;
3897   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3898   PetscInt       i,coarse_size;
3899   PetscInt       *local_primal_indices;
3900   PetscErrorCode ierr;
3901 
3902   PetscFunctionBegin;
3903   /* Compute global number of coarse dofs */
3904   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3905     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3906   }
3907   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3908 
3909   /* check numbering */
3910   if (pcbddc->dbg_flag) {
3911     PetscScalar coarsesum,*array;
3912 
3913     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3914     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3915     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3916     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3917     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3918     for (i=0;i<pcbddc->local_primal_size;i++) {
3919       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3920     }
3921     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3922     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3923     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3924     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3925     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3926     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3927     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3928     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3929     for (i=0;i<pcis->n;i++) {
3930       if (array[i] == 1.0) {
3931         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3932       }
3933     }
3934     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3935     for (i=0;i<pcis->n;i++) {
3936       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3937     }
3938     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3939     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3940     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3941     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3942     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3943     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3944     if (pcbddc->dbg_flag > 1) {
3945       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3946       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3947       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3948       for (i=0;i<pcbddc->local_primal_size;i++) {
3949         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3950       }
3951       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3952     }
3953     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3954   }
3955   /* get back data */
3956   *coarse_size_n = coarse_size;
3957   *local_primal_indices_n = local_primal_indices;
3958   PetscFunctionReturn(0);
3959 }
3960 
3961 #undef __FUNCT__
3962 #define __FUNCT__ "PCBDDCGlobalToLocal"
3963 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3964 {
3965   PC_IS*         pcis = (PC_IS*)pc->data;
3966   IS             localis_t;
3967   PetscInt       i,lsize,*idxs;
3968   PetscScalar    *vals;
3969   PetscErrorCode ierr;
3970 
3971   PetscFunctionBegin;
3972   /* get dirichlet indices in local ordering exploiting local to global map */
3973   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3974   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3975   for (i=0;i<lsize;i++) vals[i] = 1.0;
3976   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3977   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3978   if (idxs) { /* multilevel guard */
3979     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3980   }
3981   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3982   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3983   ierr = PetscFree(vals);CHKERRQ(ierr);
3984   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3985   /* now compute dirichlet set in local ordering */
3986   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3987   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3988   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3989   for (i=0,lsize=0;i<pcis->n;i++) {
3990     if (vals[i] == 1.0) {
3991       lsize++;
3992     }
3993   }
3994   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3995   for (i=0,lsize=0;i<pcis->n;i++) {
3996     if (vals[i] == 1.0) {
3997       idxs[lsize++] = i;
3998     }
3999   }
4000   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
4001   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4002   *localis = localis_t;
4003   PetscFunctionReturn(0);
4004 }
4005