xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 95ecbf38f54bf907855fa510dba2b327f79c8484)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_constraints,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131   n_R = pcis->n-pcbddc->n_actual_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (pcbddc->local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189   const PetscScalar      *array,*row_cmat_values;
190   const PetscInt         *row_cmat_indices,*idx_R_local;
191   PetscInt               *idx_V_B,*auxindices;
192   PetscInt               n_vertices,n_constraints,size_of_constraint;
193   PetscInt               i,j,n_R,n_D,n_B;
194   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
195   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196   MatType                impMatType;
197   /* some shortcuts to scalars */
198   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199   /* for debugging purposes */
200   PetscReal              *coarsefunctions_errors,*constraints_errors;
201 
202   PetscFunctionBegin;
203   /* get number of vertices (corners plus constraints with change of basis)
204      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205   n_vertices = pcbddc->n_actual_vertices;
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
220   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
221   if (i != n_vertices) {
222     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223   }
224 
225   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226   if (n_constraints) {
227     /* see if we can save some allocations */
228     if (pcbddc->local_auxmat2) {
229       PetscInt on_R,on_constraints;
230       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
231       if (on_R != n_R || on_constraints != n_constraints) {
232         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
233         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
234       }
235     }
236     /* work vectors */
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
239     /* auxiliary matrices */
240     if (!pcbddc->local_auxmat2) {
241       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
242       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
243       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
244       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
245     }
246 
247     /* Extract constraints on R nodes: C_{CR}  */
248     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
249     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
250     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
251 
252     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253     for (i=0;i<n_constraints;i++) {
254       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
255       /* Get row of constraint matrix in R numbering */
256       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
258       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
260       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
261       /* Solve for row of constraint matrix in R numbering */
262       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
263       /* Set values in local_auxmat2 */
264       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
266       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267     }
268     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
271 
272     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
274     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
275     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
276     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
277     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(M1);CHKERRQ(ierr);
279     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
280     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
281     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
282     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
283     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
284     ierr = MatDestroy(&M2);CHKERRQ(ierr);
285     ierr = MatDestroy(&M3);CHKERRQ(ierr);
286     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287     if (!pcbddc->local_auxmat1) {
288       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     } else {
290       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     }
292   }
293 
294   /* Get submatrices from subdomain matrix */
295   if (n_vertices) {
296     PetscInt ibs,mbs;
297     PetscBool issbaij;
298     Mat newmat;
299 
300     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
301     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
302     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
303     if (ibs != mbs) { /* need to convert to SEQAIJ */
304       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
308       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
309     } else {
310       /* this is safe */
311       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
313       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
315         /* which of the two approaches is faster? */
316         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
318         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
320         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
321       } else {
322         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       }
325     }
326     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
327     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
328     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
329   }
330 
331   /* Matrix of coarse basis functions (local) */
332   if (pcbddc->coarse_phi_B) {
333     PetscInt on_B,on_primal;
334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
335     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
337       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
338     }
339   }
340   if (pcbddc->coarse_phi_D) {
341     PetscInt on_D,on_primal;
342     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
343     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
345       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
346     }
347   }
348   if (!pcbddc->coarse_phi_B) {
349     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
350     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
351     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
352     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
353   }
354   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
356     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
357     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
358     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
359   }
360 
361   if (pcbddc->dbg_flag) {
362     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
363     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
365   }
366   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
370 
371   /* vertices */
372   for (i=0;i<n_vertices;i++) {
373     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
375     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
376     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
377     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
378     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
379     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
383     if (n_constraints) {
384       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
385       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
387     }
388     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
389     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
390 
391     /* Set values in coarse basis function and subdomain part of coarse_mat */
392     /* coarse basis functions */
393     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
394     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
400     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
405       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406     }
407     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     if (n_constraints) {
412       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
413       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
414       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     }
416 
417     /* check */
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on local nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
426       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
427       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
429       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       if (n_constraints) {
433         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       }
437       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
440       /* check saddle point solution */
441       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
444       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
445       /* shift by the identity matrix */
446       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
450     }
451   }
452 
453   /* constraints */
454   for (i=0;i<n_constraints;i++) {
455     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
456     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
457     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
458     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
459     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
461     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
462     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
463     if (n_vertices) {
464       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
465     }
466     /* Set values in coarse basis function and subdomain part of coarse_mat */
467     /* coarse basis functions */
468     j = i+n_vertices; /* don't touch this! */
469     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
470     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
474     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481     }
482     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483     if (n_vertices) {
484       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
485       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
487     }
488     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
489     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
491 
492     if (pcbddc->dbg_flag) {
493       /* assemble subdomain vector on nodes */
494       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
495       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
499       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
500       /* assemble subdomain vector of lagrange multipliers */
501       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
502       if (n_vertices) {
503         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
505         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506       }
507       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
513       /* check saddle point solution */
514       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
517       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
518       /* shift by the identity matrix */
519       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
520       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
523     }
524   }
525   /* call assembling routines for local coarse basis */
526   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   }
532 
533   /* compute other basis functions for non-symmetric problems */
534   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
535   if (!setsym || (setsym && !issym)) {
536     if (!pcbddc->coarse_psi_B) {
537       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
538       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
539       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
540       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
541     }
542     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
544       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
545       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
546       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
547     }
548     for (i=0;i<pcbddc->local_primal_size;i++) {
549       if (n_constraints) {
550         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
551         for (j=0;j<n_constraints;j++) {
552           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
553         }
554         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
555         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
556       }
557       if (i<n_vertices) {
558         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
559         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
560         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
561         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
562         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
563         if (n_constraints) {
564           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
565         }
566       } else {
567         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
568       }
569       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
570       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
571       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
575       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       if (i<n_vertices) {
577         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
578       }
579       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
584         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585       }
586 
587       if (pcbddc->dbg_flag) {
588         /* assemble subdomain vector on nodes */
589         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
590         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
592         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         if (i<n_vertices) {
594           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
595         }
596         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
597         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
598         /* assemble subdomain vector of lagrange multipliers */
599         for (j=0;j<pcbddc->local_primal_size;j++) {
600           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
601         }
602         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
603         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
604         /* check saddle point solution */
605         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
608         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
609         /* shift by the identity matrix */
610         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
611         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
614       }
615     }
616     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     }
622     unsymmetric_check = PETSC_TRUE;
623   } else { /* take references to already computed coarse basis */
624     unsymmetric_check = PETSC_FALSE;
625     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
626     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
627     if (pcbddc->coarse_phi_D) {
628       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
629       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
630     }
631   }
632   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
633   /* Checking coarse_sub_mat and coarse basis functios */
634   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
635   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   if (pcbddc->dbg_flag) {
637     Mat         coarse_sub_mat;
638     Mat         AUXMAT,TM1,TM2,TM3,TM4;
639     Mat         coarse_phi_D,coarse_phi_B;
640     Mat         coarse_psi_D,coarse_psi_B;
641     Mat         A_II,A_BB,A_IB,A_BI;
642     MatType     checkmattype=MATSEQAIJ;
643     PetscReal   real_value;
644 
645     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
646     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
649     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
651     if (unsymmetric_check) {
652       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
653       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
654     }
655     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
656 
657     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
659     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
660     if (unsymmetric_check) {
661       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
665       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
667       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
668       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
669       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
670       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673     } else {
674       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
675       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
676       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
677       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
678       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
679       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
680       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
681       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
682     }
683     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
684     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
687     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
688     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
689     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
694     for (i=0;i<pcbddc->local_primal_size;i++) {
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
696     }
697     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
698     for (i=0;i<pcbddc->local_primal_size;i++) {
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
700     }
701     if (unsymmetric_check) {
702       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
703       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
704         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
705       }
706       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
707       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
708         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
709       }
710     }
711     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
712     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
716     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
720     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
722     if (unsymmetric_check) {
723       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
724       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
725     }
726     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
727     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
728     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
729     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
730   }
731   /* free memory */
732   if (n_vertices) {
733     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
735     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
738   }
739   if (n_constraints) {
740     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
741     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
742     ierr = MatDestroy(&M1);CHKERRQ(ierr);
743     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
744   }
745   ierr = PetscFree(auxindices);CHKERRQ(ierr);
746   /* get back data */
747   *coarse_submat_vals_n = coarse_submat_vals;
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
754 {
755   PC_IS*            pcis = (PC_IS*)(pc->data);
756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
758   PetscBool         issbaij,isseqaij;
759   /* manage repeated solves */
760   MatReuse          reuse;
761   PetscErrorCode    ierr;
762 
763   PetscFunctionBegin;
764   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
765     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
766   }
767   /* get mat flags */
768   reuse = MAT_INITIAL_MATRIX;
769   if (pc->setupcalled) {
770     if (pc->flag == SAME_NONZERO_PATTERN) {
771       reuse = MAT_REUSE_MATRIX;
772     } else {
773       reuse = MAT_INITIAL_MATRIX;
774     }
775   }
776   if (reuse == MAT_INITIAL_MATRIX) {
777     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
780     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
781     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
782   }
783 
784   /* transform local matrices if needed */
785   if (pcbddc->use_change_of_basis) {
786     Mat         change_mat_all;
787     PetscScalar *row_cmat_values;
788     PetscInt    *row_cmat_indices;
789     PetscInt    *nnz,*is_indices,*temp_indices;
790     PetscInt    i,j,k,n_D,n_B;
791 
792     /* Get Non-overlapping dimensions */
793     n_B = pcis->n_B;
794     n_D = pcis->n-n_B;
795 
796     /* compute nonzero structure of change of basis on all local nodes */
797     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
798     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
800     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
801     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
802     k=1;
803     for (i=0;i<n_B;i++) {
804       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
805       nnz[is_indices[i]]=j;
806       if (k < j) k = j;
807       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
808     }
809     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
810     /* assemble change of basis matrix on the whole set of local dofs */
811     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
812     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
813     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
814     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
815     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
816     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
817     for (i=0;i<n_D;i++) {
818       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
819     }
820     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
821     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
822     for (i=0;i<n_B;i++) {
823       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
824       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
825       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
826       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
827     }
828     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
831     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
832     if (isseqaij) {
833       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
834     } else {
835       Mat work_mat;
836       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
837       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
838       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
839     }
840     /*
841     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
842     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
843     */
844     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
845     ierr = PetscFree(nnz);CHKERRQ(ierr);
846     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
847   } else {
848     /* without change of basis, the local matrix is unchanged */
849     if (!pcbddc->local_mat) {
850       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
851       pcbddc->local_mat = matis->A;
852     }
853   }
854 
855   /* get submatrices */
856   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
857   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
858   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
859   if (!issbaij) {
860     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
861     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
862   } else {
863     Mat newmat;
864     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
865     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
866     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
867     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
868   }
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
874 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
875 {
876   PC_IS*         pcis = (PC_IS*)(pc->data);
877   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
878   IS             is_aux1,is_aux2;
879   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
880   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
881   PetscInt       vbs,bs;
882   PetscBT        bitmask;
883   PetscErrorCode ierr;
884 
885   PetscFunctionBegin;
886   /*
887     No need to setup local scatters if
888       - primal space is unchanged
889         AND
890       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
891         AND
892       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
893   */
894   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
895     PetscFunctionReturn(0);
896   }
897   /* destroy old objects */
898   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
899   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
900   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
901   /* Set Non-overlapping dimensions */
902   n_B = pcis->n_B; n_D = pcis->n - n_B;
903   n_vertices = pcbddc->n_actual_vertices;
904   /* create auxiliary bitmask */
905   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
906   for (i=0;i<n_vertices;i++) {
907     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
908   }
909 
910   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
911   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
912   for (i=0, n_R=0; i<pcis->n; i++) {
913     if (!PetscBTLookup(bitmask,i)) {
914       idx_R_local[n_R] = i;
915       n_R++;
916     }
917   }
918 
919   /* Block code */
920   vbs = 1;
921   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
922   if (bs>1 && !(n_vertices%bs)) {
923     PetscBool is_blocked = PETSC_TRUE;
924     PetscInt  *vary;
925     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
926     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
927     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
928     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
929     for (i=0; i<n_vertices; i++) {
930       if (vary[i]!=0 && vary[i]!=bs) {
931         is_blocked = PETSC_FALSE;
932         break;
933       }
934     }
935     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
936       vbs = bs;
937       for (i=0;i<n_R/vbs;i++) {
938         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
939       }
940     }
941     ierr = PetscFree(vary);CHKERRQ(ierr);
942   }
943   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
944   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
945 
946   /* print some info if requested */
947   if (pcbddc->dbg_flag) {
948     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
949     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
950     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
951     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
952     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
953     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
955     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
956   }
957 
958   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
959   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
960   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
961   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
962   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
963   for (i=0; i<n_D; i++) {
964     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
965   }
966   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   for (i=0, j=0; i<n_R; i++) {
968     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
969       aux_array1[j++] = i;
970     }
971   }
972   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
973   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
974   for (i=0, j=0; i<n_B; i++) {
975     if (!PetscBTLookup(bitmask,is_indices[i])) {
976       aux_array2[j++] = i;
977     }
978   }
979   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
980   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
981   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
982   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
983   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
984 
985   if (pcbddc->switch_static || pcbddc->dbg_flag) {
986     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
987     for (i=0, j=0; i<n_R; i++) {
988       if (PetscBTLookup(bitmask,idx_R_local[i])) {
989         aux_array1[j++] = i;
990       }
991     }
992     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
993     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
994     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
995   }
996   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
997   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1004 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1005 {
1006   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1007   PC_IS          *pcis = (PC_IS*)pc->data;
1008   PC             pc_temp;
1009   Mat            A_RR;
1010   MatReuse       reuse;
1011   PetscScalar    m_one = -1.0;
1012   PetscReal      value;
1013   PetscInt       n_D,n_R,ibs,mbs;
1014   PetscBool      use_exact,use_exact_reduced,issbaij;
1015   PetscErrorCode ierr;
1016   /* prefixes stuff */
1017   char           dir_prefix[256],neu_prefix[256],str_level[16];
1018   size_t         len;
1019 
1020   PetscFunctionBegin;
1021 
1022   /* compute prefixes */
1023   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1024   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1025   if (!pcbddc->current_level) {
1026     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1027     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1028     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1029     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1030   } else {
1031     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1032     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1033     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1034     len -= 15; /* remove "pc_bddc_coarse_" */
1035     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1036     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1037     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1038     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1039     *(dir_prefix+len)='\0';
1040     *(neu_prefix+len)='\0';
1041     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1042     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1043     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1044     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1045   }
1046 
1047   /* DIRICHLET PROBLEM */
1048   /* Matrix for Dirichlet problem is pcis->A_II */
1049   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1050   if (!pcbddc->ksp_D) { /* create object if not yet build */
1051     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1052     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1053     /* default */
1054     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1055     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1056     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1057     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1058     if (issbaij) {
1059       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1060     } else {
1061       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1062     }
1063     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1064     /* Allow user's customization */
1065     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1066   }
1067   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1068   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1069   if (!n_D) {
1070     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1071     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1072   }
1073   /* Set Up KSP for Dirichlet problem of BDDC */
1074   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1075   /* set ksp_D into pcis data */
1076   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1077   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1078   pcis->ksp_D = pcbddc->ksp_D;
1079 
1080   /* NEUMANN PROBLEM */
1081   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1082   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1083   if (pcbddc->ksp_R) { /* already created ksp */
1084     PetscInt nn_R;
1085     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1086     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1087     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1088     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1089       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1090       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091       reuse = MAT_INITIAL_MATRIX;
1092     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1093       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1094         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1095         reuse = MAT_INITIAL_MATRIX;
1096       } else { /* safe to reuse the matrix */
1097         reuse = MAT_REUSE_MATRIX;
1098       }
1099     }
1100     /* last check */
1101     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1102       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1103       reuse = MAT_INITIAL_MATRIX;
1104     }
1105   } else { /* first time, so we need to create the matrix */
1106     reuse = MAT_INITIAL_MATRIX;
1107   }
1108   /* extract A_RR */
1109   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1110   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1111   if (ibs != mbs) {
1112     Mat newmat;
1113     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1114     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1115     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1116   } else {
1117     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1118   }
1119   if (!pcbddc->ksp_R) { /* create object if not present */
1120     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1121     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1122     /* default */
1123     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1124     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1125     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1126     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1127     if (issbaij) {
1128       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1129     } else {
1130       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1131     }
1132     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1133     /* Allow user's customization */
1134     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1135   }
1136   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1137   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1138   if (!n_R) {
1139     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1140     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1141   }
1142   /* Set Up KSP for Neumann problem of BDDC */
1143   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1144 
1145   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1146   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1147     /* Dirichlet */
1148     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1149     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1150     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1151     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1152     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1153     /* need to be adapted? */
1154     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1155     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1156     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1157     /* print info */
1158     if (pcbddc->dbg_flag) {
1159       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1160       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1161       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1162       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1163       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1164       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1165     }
1166     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1167       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1168     }
1169 
1170     /* Neumann */
1171     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1172     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1173     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1174     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1175     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1176     /* need to be adapted? */
1177     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1178     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1179     /* print info */
1180     if (pcbddc->dbg_flag) {
1181       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1182       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1183     }
1184     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1185       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1186     }
1187   }
1188   /* free Neumann problem's matrix */
1189   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1195 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1196 {
1197   PetscErrorCode ierr;
1198   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1199 
1200   PetscFunctionBegin;
1201   if (applytranspose) {
1202     if (pcbddc->local_auxmat1) {
1203       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1204       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1205     }
1206     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1207   } else {
1208     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1209     if (pcbddc->local_auxmat1) {
1210       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1211       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1212     }
1213   }
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1218 #undef __FUNCT__
1219 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1220 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1221 {
1222   PetscErrorCode ierr;
1223   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1224   PC_IS*            pcis = (PC_IS*)  (pc->data);
1225   const PetscScalar zero = 0.0;
1226 
1227   PetscFunctionBegin;
1228   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1229   if (applytranspose) {
1230     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1231     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1232   } else {
1233     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1234     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1235   }
1236   /* start communications from local primal nodes to rhs of coarse solver */
1237   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1238   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240 
1241   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1242   /* TODO remove null space when doing multilevel */
1243   if (pcbddc->coarse_ksp) {
1244     if (applytranspose) {
1245       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1246     } else {
1247       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1248     }
1249   }
1250   /* start communications from coarse solver solution to local primal nodes */
1251   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1252 
1253   /* Local solution on R nodes */
1254   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1255   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257   if (pcbddc->switch_static) {
1258     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1260   }
1261   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1262   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1263   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   if (pcbddc->switch_static) {
1266     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   }
1269 
1270   /* complete communications from coarse sol to local primal nodes */
1271   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1272 
1273   /* Sum contributions from two levels */
1274   if (applytranspose) {
1275     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1276     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1277   } else {
1278     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1279     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1285 #undef __FUNCT__
1286 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1287 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1288 {
1289   PetscErrorCode ierr;
1290   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1291   PetscScalar    *array,*array2;
1292   Vec            from,to;
1293 
1294   PetscFunctionBegin;
1295   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1296     from = pcbddc->coarse_vec;
1297     to = pcbddc->vec1_P;
1298     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1299       Vec tvec;
1300       PetscInt lsize;
1301       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1302       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1303       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1304       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1305       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1306       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1307       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1308     }
1309   } else { /* from local to global -> put data in coarse right hand side */
1310     from = pcbddc->vec1_P;
1311     to = pcbddc->coarse_vec;
1312   }
1313   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1319 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1320 {
1321   PetscErrorCode ierr;
1322   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1323   PetscScalar    *array,*array2;
1324   Vec            from,to;
1325 
1326   PetscFunctionBegin;
1327   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1328     from = pcbddc->coarse_vec;
1329     to = pcbddc->vec1_P;
1330   } else { /* from local to global -> put data in coarse right hand side */
1331     from = pcbddc->vec1_P;
1332     to = pcbddc->coarse_vec;
1333   }
1334   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1335   if (smode == SCATTER_FORWARD) {
1336     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1337       Vec tvec;
1338       PetscInt lsize;
1339       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1340       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1341       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1342       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1343       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1344       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1345       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1346     }
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /* uncomment for testing purposes */
1352 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1353 #undef __FUNCT__
1354 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1355 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1356 {
1357   PetscErrorCode    ierr;
1358   PC_IS*            pcis = (PC_IS*)(pc->data);
1359   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1360   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1361   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1362   MatType           impMatType=MATSEQAIJ;
1363   /* one and zero */
1364   PetscScalar       one=1.0,zero=0.0;
1365   /* space to store constraints and their local indices */
1366   PetscScalar       *temp_quadrature_constraint;
1367   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1368   /* iterators */
1369   PetscInt          i,j,k,total_counts,temp_start_ptr;
1370   /* stuff to store connected components stored in pcbddc->mat_graph */
1371   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1372   PetscInt          n_ISForFaces,n_ISForEdges;
1373   /* near null space stuff */
1374   MatNullSpace      nearnullsp;
1375   const Vec         *nearnullvecs;
1376   Vec               *localnearnullsp;
1377   PetscBool         nnsp_has_cnst;
1378   PetscInt          nnsp_size;
1379   PetscScalar       *array;
1380   /* BLAS integers */
1381   PetscBLASInt      lwork,lierr;
1382   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1383   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1384   /* LAPACK working arrays for SVD or POD */
1385   PetscBool         skip_lapack;
1386   PetscScalar       *work;
1387   PetscReal         *singular_vals;
1388 #if defined(PETSC_USE_COMPLEX)
1389   PetscReal         *rwork;
1390 #endif
1391 #if defined(PETSC_MISSING_LAPACK_GESVD)
1392   PetscBLASInt      Blas_one_2=1;
1393   PetscScalar       *temp_basis,*correlation_mat;
1394 #else
1395   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1396   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1397 #endif
1398   /* reuse */
1399   PetscInt          olocal_primal_size;
1400   PetscInt          *oprimal_indices_local_idxs;
1401   /* change of basis */
1402   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1403   PetscBool         boolforchange,qr_needed;
1404   PetscBT           touched,change_basis,qr_needed_idx;
1405   /* auxiliary stuff */
1406   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1407   /* some quantities */
1408   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1409   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1410 
1411 
1412   PetscFunctionBegin;
1413   /* Destroy Mat objects computed previously */
1414   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1415   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1416   /* Get index sets for faces, edges and vertices from graph */
1417   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1418     pcbddc->use_vertices = PETSC_TRUE;
1419   }
1420   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1421   /* HACKS (the two following code branches) */
1422   if (!ISForVertices && pcbddc->NullSpace) {
1423     pcbddc->use_change_of_basis = PETSC_TRUE;
1424     pcbddc->use_change_on_faces = PETSC_FALSE;
1425   }
1426   if (pcbddc->NullSpace) {
1427     /* use_change_of_basis should be consistent among processors */
1428     PetscBool tbool = pcbddc->use_change_of_basis;
1429     ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1430   }
1431   /* print some info */
1432   if (pcbddc->dbg_flag) {
1433     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1434     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1435     i = 0;
1436     if (ISForVertices) {
1437       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1438     }
1439     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1441     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1442     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1443   }
1444   /* check if near null space is attached to global mat */
1445   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1446   if (nearnullsp) {
1447     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1448     /* remove any stored info */
1449     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1450     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1451     /* store information for BDDC solver reuse */
1452     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1453     pcbddc->onearnullspace = nearnullsp;
1454     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1455     for (i=0;i<nnsp_size;i++) {
1456       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1457     }
1458   } else { /* if near null space is not provided BDDC uses constants by default */
1459     nnsp_size = 0;
1460     nnsp_has_cnst = PETSC_TRUE;
1461   }
1462   /* get max number of constraints on a single cc */
1463   max_constraints = nnsp_size;
1464   if (nnsp_has_cnst) max_constraints++;
1465 
1466   /*
1467        Evaluate maximum storage size needed by the procedure
1468        - temp_indices will contain start index of each constraint stored as follows
1469        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1470        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1471        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1472                                                                                                                                                          */
1473   total_counts = n_ISForFaces+n_ISForEdges;
1474   total_counts *= max_constraints;
1475   n_vertices = 0;
1476   if (ISForVertices) {
1477     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1478   }
1479   total_counts += n_vertices;
1480   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1481   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1482   total_counts = 0;
1483   max_size_of_constraint = 0;
1484   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1485     if (i<n_ISForEdges) {
1486       used_IS = &ISForEdges[i];
1487     } else {
1488       used_IS = &ISForFaces[i-n_ISForEdges];
1489     }
1490     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1491     total_counts += j;
1492     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1493   }
1494   total_counts *= max_constraints;
1495   total_counts += n_vertices;
1496   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1497   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1498   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1499   /* get local part of global near null space vectors */
1500   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1501   for (k=0;k<nnsp_size;k++) {
1502     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1503     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505   }
1506 
1507   /* whether or not to skip lapack calls */
1508   skip_lapack = PETSC_TRUE;
1509   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1510 
1511   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1512   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1513     PetscScalar temp_work;
1514 #if defined(PETSC_MISSING_LAPACK_GESVD)
1515     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1516     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1517     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1518     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1519 #if defined(PETSC_USE_COMPLEX)
1520     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1521 #endif
1522     /* now we evaluate the optimal workspace using query with lwork=-1 */
1523     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1524     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1525     lwork = -1;
1526     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1527 #if !defined(PETSC_USE_COMPLEX)
1528     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1529 #else
1530     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1531 #endif
1532     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1533     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1534 #else /* on missing GESVD */
1535     /* SVD */
1536     PetscInt max_n,min_n;
1537     max_n = max_size_of_constraint;
1538     min_n = max_constraints;
1539     if (max_size_of_constraint < max_constraints) {
1540       min_n = max_size_of_constraint;
1541       max_n = max_constraints;
1542     }
1543     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1544 #if defined(PETSC_USE_COMPLEX)
1545     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1546 #endif
1547     /* now we evaluate the optimal workspace using query with lwork=-1 */
1548     lwork = -1;
1549     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1550     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1551     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1552     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1553 #if !defined(PETSC_USE_COMPLEX)
1554     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1555 #else
1556     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1557 #endif
1558     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1559     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1560 #endif /* on missing GESVD */
1561     /* Allocate optimal workspace */
1562     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1563     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1564   }
1565   /* Now we can loop on constraining sets */
1566   total_counts = 0;
1567   temp_indices[0] = 0;
1568   /* vertices */
1569   if (ISForVertices) {
1570     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1571     if (nnsp_has_cnst) { /* consider all vertices */
1572       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1573       for (i=0;i<n_vertices;i++) {
1574         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1575         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1576         total_counts++;
1577       }
1578     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1579       PetscBool used_vertex;
1580       for (i=0;i<n_vertices;i++) {
1581         used_vertex = PETSC_FALSE;
1582         k = 0;
1583         while (!used_vertex && k<nnsp_size) {
1584           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1585           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1586             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1587             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1588             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1589             total_counts++;
1590             used_vertex = PETSC_TRUE;
1591           }
1592           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1593           k++;
1594         }
1595       }
1596     }
1597     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1598     n_vertices = total_counts;
1599   }
1600 
1601   /* edges and faces */
1602   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1603     if (i<n_ISForEdges) {
1604       used_IS = &ISForEdges[i];
1605       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1606     } else {
1607       used_IS = &ISForFaces[i-n_ISForEdges];
1608       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1609     }
1610     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1611     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1612     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1613     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1614     /* change of basis should not be performed on local periodic nodes */
1615     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1616     if (nnsp_has_cnst) {
1617       PetscScalar quad_value;
1618       temp_constraints++;
1619       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1620       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1621       for (j=0;j<size_of_constraint;j++) {
1622         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1623       }
1624       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1625       total_counts++;
1626     }
1627     for (k=0;k<nnsp_size;k++) {
1628       PetscReal real_value;
1629       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1630       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1631       for (j=0;j<size_of_constraint;j++) {
1632         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1633       }
1634       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1635       /* check if array is null on the connected component */
1636       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1637       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1638       if (real_value > 0.0) { /* keep indices and values */
1639         temp_constraints++;
1640         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1641         total_counts++;
1642       }
1643     }
1644     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1645     valid_constraints = temp_constraints;
1646     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1647     if (!pcbddc->use_nnsp_true && temp_constraints) {
1648       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1649 
1650 #if defined(PETSC_MISSING_LAPACK_GESVD)
1651       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1652          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1653          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1654             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1655             from that computed using LAPACKgesvd
1656          -> This is due to a different computation of eigenvectors in LAPACKheev
1657          -> The quality of the POD-computed basis will be the same */
1658       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1659       /* Store upper triangular part of correlation matrix */
1660       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1661       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1662       for (j=0;j<temp_constraints;j++) {
1663         for (k=0;k<j+1;k++) {
1664           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1665         }
1666       }
1667       /* compute eigenvalues and eigenvectors of correlation matrix */
1668       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1669       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1670 #if !defined(PETSC_USE_COMPLEX)
1671       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1672 #else
1673       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1674 #endif
1675       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1676       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1677       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1678       j=0;
1679       while (j < temp_constraints && singular_vals[j] < tol) j++;
1680       total_counts=total_counts-j;
1681       valid_constraints = temp_constraints-j;
1682       /* scale and copy POD basis into used quadrature memory */
1683       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1684       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1685       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1686       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1687       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1688       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1689       if (j<temp_constraints) {
1690         PetscInt ii;
1691         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1692         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1693         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1694         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1695         for (k=0;k<temp_constraints-j;k++) {
1696           for (ii=0;ii<size_of_constraint;ii++) {
1697             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1698           }
1699         }
1700       }
1701 #else  /* on missing GESVD */
1702       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1703       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1704       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1705       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1706 #if !defined(PETSC_USE_COMPLEX)
1707       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1708 #else
1709       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1710 #endif
1711       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1712       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1713       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1714       k = temp_constraints;
1715       if (k > size_of_constraint) k = size_of_constraint;
1716       j = 0;
1717       while (j < k && singular_vals[k-j-1] < tol) j++;
1718       total_counts = total_counts-temp_constraints+k-j;
1719       valid_constraints = k-j;
1720 #endif /* on missing GESVD */
1721     }
1722     /* setting change_of_basis flag is safe now */
1723     if (boolforchange) {
1724       for (j=0;j<valid_constraints;j++) {
1725         PetscBTSet(change_basis,total_counts-j-1);
1726       }
1727     }
1728   }
1729   /* free index sets of faces, edges and vertices */
1730   for (i=0;i<n_ISForFaces;i++) {
1731     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1732   }
1733   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1734   for (i=0;i<n_ISForEdges;i++) {
1735     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1736   }
1737   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1738   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1739   /* map temp_indices_to_constraint in boundary numbering */
1740   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1741   if (i != temp_indices[total_counts]) {
1742     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1743   }
1744 
1745   /* free workspace */
1746   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1747     ierr = PetscFree(work);CHKERRQ(ierr);
1748 #if defined(PETSC_USE_COMPLEX)
1749     ierr = PetscFree(rwork);CHKERRQ(ierr);
1750 #endif
1751     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1752 #if defined(PETSC_MISSING_LAPACK_GESVD)
1753     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1754     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1755 #endif
1756   }
1757   for (k=0;k<nnsp_size;k++) {
1758     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1759   }
1760   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1761 
1762   /* set quantities in pcbddc data structure and store previous primal size */
1763   /* n_vertices defines the number of subdomain corners in the primal space */
1764   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1765   olocal_primal_size = pcbddc->local_primal_size;
1766   pcbddc->local_primal_size = total_counts;
1767   pcbddc->n_vertices = n_vertices;
1768   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1769 
1770   /* Create constraint matrix */
1771   /* The constraint matrix is used to compute the l2g map of primal dofs */
1772   /* so we need to set it up properly either with or without change of basis */
1773   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1774   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1775   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1776   /* array to compute a local numbering of constraints : vertices first then constraints */
1777   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1778   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1779   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1780   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1781   /* auxiliary stuff for basis change */
1782   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1783   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1784 
1785   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1786   total_primal_vertices=0;
1787   for (i=0;i<pcbddc->local_primal_size;i++) {
1788     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1789     if (size_of_constraint == 1) {
1790       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1791       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1792       aux_primal_minloc[total_primal_vertices]=0;
1793       total_primal_vertices++;
1794     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1795       PetscInt min_loc,min_index;
1796       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1797       /* find first untouched local node */
1798       k = 0;
1799       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1800       min_index = global_indices[k];
1801       min_loc = k;
1802       /* search the minimum among global nodes already untouched on the cc */
1803       for (k=1;k<size_of_constraint;k++) {
1804         /* there can be more than one constraint on a single connected component */
1805         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1806           min_index = global_indices[k];
1807           min_loc = k;
1808         }
1809       }
1810       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1811       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1812       aux_primal_minloc[total_primal_vertices]=min_loc;
1813       total_primal_vertices++;
1814     }
1815   }
1816   /* determine if a QR strategy is needed for change of basis */
1817   qr_needed = PETSC_FALSE;
1818   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1819   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1820     if (PetscBTLookup(change_basis,i)) {
1821       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1822       j = 0;
1823       for (k=0;k<size_of_constraint;k++) {
1824         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1825           j++;
1826         }
1827       }
1828       /* found more than one primal dof on the cc */
1829       if (j > 1) {
1830         PetscBTSet(qr_needed_idx,i);
1831         qr_needed = PETSC_TRUE;
1832       }
1833     }
1834   }
1835   /* free workspace */
1836   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1837 
1838   /* permute indices in order to have a sorted set of vertices */
1839   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1840 
1841   /* nonzero structure of constraint matrix */
1842   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1843   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1844   j=total_primal_vertices;
1845   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1846     if (!PetscBTLookup(change_basis,i)) {
1847       nnz[j]=temp_indices[i+1]-temp_indices[i];
1848       j++;
1849     }
1850   }
1851   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1852   ierr = PetscFree(nnz);CHKERRQ(ierr);
1853   /* set values in constraint matrix */
1854   for (i=0;i<total_primal_vertices;i++) {
1855     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1856   }
1857   total_counts = total_primal_vertices;
1858   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1859     if (!PetscBTLookup(change_basis,i)) {
1860       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1861       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1862       total_counts++;
1863     }
1864   }
1865   /* assembling */
1866   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   /*
1869   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1870   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1871   */
1872   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1873   if (pcbddc->use_change_of_basis) {
1874     /* dual and primal dofs on a single cc */
1875     PetscInt     dual_dofs,primal_dofs;
1876     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1877     PetscInt     primal_counter;
1878     /* working stuff for GEQRF */
1879     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1880     PetscBLASInt lqr_work;
1881     /* working stuff for UNGQR */
1882     PetscScalar  *gqr_work,lgqr_work_t;
1883     PetscBLASInt lgqr_work;
1884     /* working stuff for TRTRS */
1885     PetscScalar  *trs_rhs;
1886     PetscBLASInt Blas_NRHS;
1887     /* pointers for values insertion into change of basis matrix */
1888     PetscInt     *start_rows,*start_cols;
1889     PetscScalar  *start_vals;
1890     /* working stuff for values insertion */
1891     PetscBT      is_primal;
1892 
1893     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1894     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1895     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1896     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1897     /* work arrays */
1898     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1899     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1900     /* nonzeros per row */
1901     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1902       if (PetscBTLookup(change_basis,i)) {
1903         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1904         if (PetscBTLookup(qr_needed_idx,i)) {
1905           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1906         } else {
1907           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1908           /* get local primal index on the cc */
1909           j = 0;
1910           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1911           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1912         }
1913       }
1914     }
1915     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1916     ierr = PetscFree(nnz);CHKERRQ(ierr);
1917     /* Set initial identity in the matrix */
1918     for (i=0;i<pcis->n_B;i++) {
1919       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1920     }
1921 
1922     if (pcbddc->dbg_flag) {
1923       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1924       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1925     }
1926 
1927 
1928     /* Now we loop on the constraints which need a change of basis */
1929     /*
1930        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1931        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1932 
1933        Basic blocks of change of basis matrix T computed
1934 
1935           - Using the following block transformation if there is only a primal dof on the cc
1936             (in the example, primal dof is the last one of the edge in LOCAL ordering
1937              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1938             | 1        0   ...        0     1 |
1939             | 0        1   ...        0     1 |
1940             |              ...                |
1941             | 0        ...            1     1 |
1942             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1943 
1944           - via QR decomposition of constraints otherwise
1945     */
1946     if (qr_needed) {
1947       /* space to store Q */
1948       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1949       /* first we issue queries for optimal work */
1950       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1951       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1953       lqr_work = -1;
1954       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1955       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1956       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1957       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1958       lgqr_work = -1;
1959       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1960       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1961       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1962       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1963       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1964       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1965       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1966       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1967       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1968       /* array to store scaling factors for reflectors */
1969       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
1970       /* array to store rhs and solution of triangular solver */
1971       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
1972       /* allocating workspace for check */
1973       if (pcbddc->dbg_flag) {
1974         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
1975       }
1976     }
1977     /* array to store whether a node is primal or not */
1978     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1979     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
1980     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1981     if (i != total_primal_vertices) {
1982       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1983     }
1984     for (i=0;i<total_primal_vertices;i++) {
1985       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1986     }
1987     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1988 
1989     /* loop on constraints and see whether or not they need a change of basis and compute it */
1990     /* -> using implicit ordering contained in temp_indices data */
1991     total_counts = pcbddc->n_vertices;
1992     primal_counter = total_counts;
1993     while (total_counts<pcbddc->local_primal_size) {
1994       primal_dofs = 1;
1995       if (PetscBTLookup(change_basis,total_counts)) {
1996         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1997         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1998           primal_dofs++;
1999         }
2000         /* get constraint info */
2001         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2002         dual_dofs = size_of_constraint-primal_dofs;
2003 
2004         if (pcbddc->dbg_flag) {
2005           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2006         }
2007 
2008         if (primal_dofs > 1) { /* QR */
2009 
2010           /* copy quadrature constraints for change of basis check */
2011           if (pcbddc->dbg_flag) {
2012             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2013           }
2014           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2015           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2016 
2017           /* compute QR decomposition of constraints */
2018           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2019           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2020           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2021           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2022           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2023           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2024           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2025 
2026           /* explictly compute R^-T */
2027           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2028           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2029           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2030           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2032           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2033           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2034           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2035           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2036           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2037 
2038           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2039           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2040           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2041           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2042           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2043           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2044           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2045           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2046           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2047 
2048           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2049              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2050              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2052           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2053           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2056           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2057           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2058           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2059           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2060           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2061 
2062           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2063           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2064           /* insert cols for primal dofs */
2065           for (j=0;j<primal_dofs;j++) {
2066             start_vals = &qr_basis[j*size_of_constraint];
2067             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2068             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2069           }
2070           /* insert cols for dual dofs */
2071           for (j=0,k=0;j<dual_dofs;k++) {
2072             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2073               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2074               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2075               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2076               j++;
2077             }
2078           }
2079 
2080           /* check change of basis */
2081           if (pcbddc->dbg_flag) {
2082             PetscInt   ii,jj;
2083             PetscBool valid_qr=PETSC_TRUE;
2084             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2085             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2086             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2087             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2088             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2089             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2090             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2091             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2092             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2093             for (jj=0;jj<size_of_constraint;jj++) {
2094               for (ii=0;ii<primal_dofs;ii++) {
2095                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2096                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2097               }
2098             }
2099             if (!valid_qr) {
2100               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2101               for (jj=0;jj<size_of_constraint;jj++) {
2102                 for (ii=0;ii<primal_dofs;ii++) {
2103                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2104                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2105                   }
2106                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2107                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2108                   }
2109                 }
2110               }
2111             } else {
2112               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2113             }
2114           }
2115         } else { /* simple transformation block */
2116           PetscInt row,col;
2117           PetscScalar val;
2118           for (j=0;j<size_of_constraint;j++) {
2119             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2120             if (!PetscBTLookup(is_primal,row)) {
2121               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2122               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2123               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2124             } else {
2125               for (k=0;k<size_of_constraint;k++) {
2126                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2127                 if (row != col) {
2128                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2129                 } else {
2130                   val = 1.0;
2131                 }
2132                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2133               }
2134             }
2135           }
2136           if (pcbddc->dbg_flag) {
2137             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2138           }
2139         }
2140         /* increment primal counter */
2141         primal_counter += primal_dofs;
2142       } else {
2143         if (pcbddc->dbg_flag) {
2144           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2145         }
2146       }
2147       /* increment constraint counter total_counts */
2148       total_counts += primal_dofs;
2149     }
2150 
2151     /* free workspace */
2152     if (qr_needed) {
2153       if (pcbddc->dbg_flag) {
2154         ierr = PetscFree(work);CHKERRQ(ierr);
2155       }
2156       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2157       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2158       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2159       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2160       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2161     }
2162     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2163     /* assembling */
2164     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166     /*
2167     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2168     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2169     */
2170   }
2171 
2172   /* get indices in local ordering for vertices and constraints */
2173   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2174     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2175     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2176   }
2177   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2178   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2179   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2180   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2181   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2182   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2183   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2184   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2185   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2186   /* set quantities in PCBDDC data struct */
2187   pcbddc->n_actual_vertices = i;
2188   /* check if a new primal space has been introduced */
2189   pcbddc->new_primal_space_local = PETSC_TRUE;
2190   if (olocal_primal_size == pcbddc->local_primal_size) {
2191     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2192     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2193     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2194   }
2195   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2196   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2197 
2198   /* flush dbg viewer */
2199   if (pcbddc->dbg_flag) {
2200     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2201   }
2202 
2203   /* free workspace */
2204   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2205   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2206   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2207   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2208   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2209   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2210   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2211   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2217 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2218 {
2219   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2220   PC_IS       *pcis = (PC_IS*)pc->data;
2221   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2222   PetscInt    ierr,i,vertex_size;
2223   PetscViewer viewer=pcbddc->dbg_viewer;
2224 
2225   PetscFunctionBegin;
2226   /* Reset previously computed graph */
2227   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2228   /* Init local Graph struct */
2229   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2230 
2231   /* Check validity of the csr graph passed in by the user */
2232   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2233     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2234   }
2235 
2236   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2237   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2238     Mat mat_adj;
2239     const PetscInt *xadj,*adjncy;
2240     PetscBool flg_row=PETSC_TRUE;
2241 
2242     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2243     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2244     if (!flg_row) {
2245       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2246     }
2247     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2248     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2249     if (!flg_row) {
2250       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2251     }
2252     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2253   }
2254 
2255   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2256   vertex_size = 1;
2257   if (pcbddc->user_provided_isfordofs) {
2258     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2259       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2260       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2261         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2262         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2263       }
2264       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2265       pcbddc->n_ISForDofs = 0;
2266       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2267     }
2268     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2269     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2270   } else {
2271     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2272       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2273       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2274       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2275         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2276       }
2277     }
2278   }
2279 
2280   /* Setup of Graph */
2281   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2282     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2283   }
2284   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2285     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2286   }
2287   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2288 
2289   /* Graph's connected components analysis */
2290   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2291 
2292   /* print some info to stdout */
2293   if (pcbddc->dbg_flag) {
2294     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2295   }
2296 
2297   /* mark topography has done */
2298   pcbddc->recompute_topography = PETSC_FALSE;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2304 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2305 {
2306   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2307   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   n = 0;
2312   vertices = 0;
2313   if (pcbddc->ConstraintMatrix) {
2314     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2315     for (i=0;i<local_primal_size;i++) {
2316       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2317       if (size_of_constraint == 1) n++;
2318       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2319     }
2320     if (vertices_idx) {
2321       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2322       n = 0;
2323       for (i=0;i<local_primal_size;i++) {
2324         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2325         if (size_of_constraint == 1) {
2326           vertices[n++]=row_cmat_indices[0];
2327         }
2328         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2329       }
2330     }
2331   }
2332   *n_vertices = n;
2333   if (vertices_idx) *vertices_idx = vertices;
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2339 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2340 {
2341   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2342   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2343   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2344   PetscBT        touched;
2345   PetscErrorCode ierr;
2346 
2347     /* This function assumes that the number of local constraints per connected component
2348        is not greater than the number of nodes defined for the connected component
2349        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2350   PetscFunctionBegin;
2351   n = 0;
2352   constraints_index = 0;
2353   if (pcbddc->ConstraintMatrix) {
2354     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2355     max_size_of_constraint = 0;
2356     for (i=0;i<local_primal_size;i++) {
2357       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2358       if (size_of_constraint > 1) {
2359         n++;
2360       }
2361       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2362       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2363     }
2364     if (constraints_idx) {
2365       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2366       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2367       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2368       n = 0;
2369       for (i=0;i<local_primal_size;i++) {
2370         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2371         if (size_of_constraint > 1) {
2372           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2373           /* find first untouched local node */
2374           j = 0;
2375           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2376           min_index = row_cmat_global_indices[j];
2377           min_loc = j;
2378           /* search the minimum among nodes not yet touched on the connected component
2379              since there can be more than one constraint on a single cc */
2380           for (j=1;j<size_of_constraint;j++) {
2381             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2382               min_index = row_cmat_global_indices[j];
2383               min_loc = j;
2384             }
2385           }
2386           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2387           constraints_index[n++] = row_cmat_indices[min_loc];
2388         }
2389         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2390       }
2391       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2392       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2393     }
2394   }
2395   *n_constraints = n;
2396   if (constraints_idx) *constraints_idx = constraints_index;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 /* the next two functions has been adapted from pcis.c */
2401 #undef __FUNCT__
2402 #define __FUNCT__ "PCBDDCApplySchur"
2403 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2404 {
2405   PetscErrorCode ierr;
2406   PC_IS          *pcis = (PC_IS*)(pc->data);
2407 
2408   PetscFunctionBegin;
2409   if (!vec2_B) { vec2_B = v; }
2410   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2411   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2412   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2413   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2414   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2420 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2421 {
2422   PetscErrorCode ierr;
2423   PC_IS          *pcis = (PC_IS*)(pc->data);
2424 
2425   PetscFunctionBegin;
2426   if (!vec2_B) { vec2_B = v; }
2427   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2428   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2429   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2430   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2431   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "PCBDDCSubsetNumbering"
2437 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2438 {
2439   Vec            local_vec,global_vec;
2440   IS             seqis,paris;
2441   VecScatter     scatter_ctx;
2442   PetscScalar    *array;
2443   PetscInt       *temp_global_dofs;
2444   PetscScalar    globalsum;
2445   PetscInt       i,j,s;
2446   PetscInt       nlocals,first_index,old_index,max_local;
2447   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2448   PetscMPIInt    *dof_sizes,*dof_displs;
2449   PetscBool      first_found;
2450   PetscErrorCode ierr;
2451 
2452   PetscFunctionBegin;
2453   /* mpi buffers */
2454   MPI_Comm_size(comm,&size_prec_comm);
2455   MPI_Comm_rank(comm,&rank_prec_comm);
2456   j = ( !rank_prec_comm ? size_prec_comm : 0);
2457   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2458   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2459   /* get maximum size of subset */
2460   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2461   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2462   max_local = 0;
2463   if (n_local_dofs) {
2464     max_local = temp_global_dofs[0];
2465     for (i=1;i<n_local_dofs;i++) {
2466       if (max_local < temp_global_dofs[i] ) {
2467         max_local = temp_global_dofs[i];
2468       }
2469     }
2470   }
2471   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2472   max_global++;
2473   max_local = 0;
2474   if (n_local_dofs) {
2475     max_local = local_dofs[0];
2476     for (i=1;i<n_local_dofs;i++) {
2477       if (max_local < local_dofs[i] ) {
2478         max_local = local_dofs[i];
2479       }
2480     }
2481   }
2482   max_local++;
2483   /* allocate workspace */
2484   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2485   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2486   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2487   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2488   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2489   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2490   /* create scatter */
2491   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2492   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2493   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2494   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2495   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2496   /* init array */
2497   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2498   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2499   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2500   if (local_dofs_mult) {
2501     for (i=0;i<n_local_dofs;i++) {
2502       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2503     }
2504   } else {
2505     for (i=0;i<n_local_dofs;i++) {
2506       array[local_dofs[i]]=1.0;
2507     }
2508   }
2509   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2510   /* scatter into global vec and get total number of global dofs */
2511   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2513   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2514   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2515   /* Fill global_vec with cumulative function for global numbering */
2516   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2517   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2518   nlocals = 0;
2519   first_index = -1;
2520   first_found = PETSC_FALSE;
2521   for (i=0;i<s;i++) {
2522     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2523       first_found = PETSC_TRUE;
2524       first_index = i;
2525     }
2526     nlocals += (PetscInt)PetscRealPart(array[i]);
2527   }
2528   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2529   if (!rank_prec_comm) {
2530     dof_displs[0]=0;
2531     for (i=1;i<size_prec_comm;i++) {
2532       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2533     }
2534   }
2535   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2536   if (first_found) {
2537     array[first_index] += (PetscScalar)nlocals;
2538     old_index = first_index;
2539     for (i=first_index+1;i<s;i++) {
2540       if (PetscRealPart(array[i]) > 0.0) {
2541         array[i] += array[old_index];
2542         old_index = i;
2543       }
2544     }
2545   }
2546   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2547   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2548   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2549   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2550   /* get global ordering of local dofs */
2551   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2552   if (local_dofs_mult) {
2553     for (i=0;i<n_local_dofs;i++) {
2554       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2555     }
2556   } else {
2557     for (i=0;i<n_local_dofs;i++) {
2558       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2559     }
2560   }
2561   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2562   /* free workspace */
2563   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2564   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2565   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2566   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2567   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2568   /* return pointer to global ordering of local dofs */
2569   *global_numbering_subset = temp_global_dofs;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2575 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2576 {
2577   PetscInt       i,j;
2578   PetscScalar    *alphas;
2579   PetscErrorCode ierr;
2580 
2581   PetscFunctionBegin;
2582   /* this implements stabilized Gram-Schmidt */
2583   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2584   for (i=0;i<n;i++) {
2585     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2586     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2587     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2588   }
2589   ierr = PetscFree(alphas);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 /* TODO
2594    - now preallocation is done assuming SEQDENSE local matrices
2595 */
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatISGetMPIXAIJ"
2598 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2599 {
2600   Mat                    new_mat;
2601   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2602   /* info on mat */
2603   /* ISLocalToGlobalMapping rmapping,cmapping; */
2604   PetscInt               bs,rows,cols;
2605   PetscInt               lrows,lcols;
2606   PetscInt               local_rows,local_cols;
2607   PetscBool              isdense;
2608   /* values insertion */
2609   PetscScalar            *array;
2610   PetscInt               *local_indices,*global_indices;
2611   /* work */
2612   PetscInt               i,j,index_row;
2613   PetscErrorCode         ierr;
2614 
2615   PetscFunctionBegin;
2616   /* MISSING CHECKS
2617     - rectangular case not covered (it is not allowed by MATIS)
2618   */
2619   /* get info from mat */
2620   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2621   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2622   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2623   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2624 
2625   /* work */
2626   ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr);
2627   for (i=0;i<local_rows;i++) local_indices[i]=i;
2628   /* map indices of local mat to global values */
2629   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2630   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2631   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2632 
2633   if (reuse==MAT_INITIAL_MATRIX) {
2634     Vec         vec_dnz,vec_onz;
2635     PetscScalar *my_dnz,*my_onz;
2636     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2637     PetscInt    index_col,owner;
2638     PetscMPIInt nsubdomains;
2639 
2640     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2641     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2642     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2643     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2644     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2645     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2646     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2647 
2648     /*
2649       preallocation
2650     */
2651 
2652     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2653     /*
2654        Some vectors are needed to sum up properly on shared interface dofs.
2655        Preallocation macros cannot do the job.
2656        Note that preallocation is not exact, since it overestimates nonzeros
2657     */
2658     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2659     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2660     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2661     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2662     /* All processes need to compute entire row ownership */
2663     ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr);
2664     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2665     for (i=0;i<nsubdomains;i++) {
2666       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2667         row_ownership[j]=i;
2668       }
2669     }
2670 
2671     /*
2672        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2673        then, they will be summed up properly. This way, preallocation is always sufficient
2674     */
2675     ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr);
2676     ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr);
2677     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2678     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2679     for (i=0;i<local_rows;i++) {
2680       index_row = global_indices[i];
2681       for (j=i;j<local_rows;j++) {
2682         owner = row_ownership[index_row];
2683         index_col = global_indices[j];
2684         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2685           my_dnz[i] += 1.0;
2686         } else { /* offdiag block */
2687           my_onz[i] += 1.0;
2688         }
2689         /* same as before, interchanging rows and cols */
2690         if (i != j) {
2691           owner = row_ownership[index_col];
2692           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2693             my_dnz[j] += 1.0;
2694           } else {
2695             my_onz[j] += 1.0;
2696           }
2697         }
2698       }
2699     }
2700     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2701     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2702     if (local_rows) { /* multilevel guard */
2703       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2704       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2705     }
2706     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2707     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2708     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2709     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2710     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2711     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2712     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2713 
2714     /* set computed preallocation in dnz and onz */
2715     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2716     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2717     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2718     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2719     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2720     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2721     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2722     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2723 
2724     /* Resize preallocation if overestimated */
2725     for (i=0;i<lrows;i++) {
2726       dnz[i] = PetscMin(dnz[i],lcols);
2727       onz[i] = PetscMin(onz[i],cols-lcols);
2728     }
2729     /* set preallocation */
2730     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2731     for (i=0;i<lrows/bs;i++) {
2732       dnz[i] = dnz[i*bs]/bs;
2733       onz[i] = onz[i*bs]/bs;
2734     }
2735     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2736     for (i=0;i<lrows/bs;i++) {
2737       dnz[i] = dnz[i]-i;
2738     }
2739     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2740     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2741     *M = new_mat;
2742   } else {
2743     PetscInt mbs,mrows,mcols;
2744     /* some checks */
2745     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2746     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2747     if (mrows != rows) {
2748       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2749     }
2750     if (mrows != rows) {
2751       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2752     }
2753     if (mbs != bs) {
2754       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2755     }
2756     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2757   }
2758   /* set local to global mappings */
2759   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2760   /* Set values */
2761   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2762   if (isdense) { /* special case for dense local matrices */
2763     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2764     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2765     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2766     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2767     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2768     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2769   } else { /* very basic values insertion for all other matrix types */
2770     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2771     for (i=0;i<local_rows;i++) {
2772       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2773       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2774       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2775       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2776       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2777       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2778     }
2779     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2780   }
2781   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   if (isdense) {
2784     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2785   }
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatISGetSubassemblingPattern"
2791 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2792 {
2793   Mat             subdomain_adj;
2794   IS              new_ranks,ranks_send_to;
2795   MatPartitioning partitioner;
2796   Mat_IS          *matis;
2797   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2798   PetscInt        prank;
2799   PetscMPIInt     size,rank,color;
2800   PetscInt        *xadj,*adjncy,*oldranks;
2801   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2802   PetscInt        i,j,local_size,threshold=0;
2803   PetscErrorCode  ierr;
2804   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2805   PetscSubcomm    subcomm;
2806 
2807   PetscFunctionBegin;
2808   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2809   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2810   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2811 
2812   /* Get info on mapping */
2813   matis = (Mat_IS*)(mat->data);
2814   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2815   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2816 
2817   /* build local CSR graph of subdomains' connectivity */
2818   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2819   xadj[0] = 0;
2820   xadj[1] = PetscMax(n_neighs-1,0);
2821   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2822   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2823 
2824   if (threshold) {
2825     PetscInt* count,min_threshold;
2826     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2827     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2828     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2829       for (j=0;j<n_shared[i];j++) {
2830         count[shared[i][j]] += 1;
2831       }
2832     }
2833     /* adapt threshold since we dont want to lose significant connections */
2834     min_threshold = n_neighs;
2835     for (i=1;i<n_neighs;i++) {
2836       for (j=0;j<n_shared[i];j++) {
2837         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2838       }
2839     }
2840     threshold = PetscMax(min_threshold+1,threshold);
2841     xadj[1] = 0;
2842     for (i=1;i<n_neighs;i++) {
2843       for (j=0;j<n_shared[i];j++) {
2844         if (count[shared[i][j]] < threshold) {
2845           adjncy[xadj[1]] = neighs[i];
2846           adjncy_wgt[xadj[1]] = n_shared[i];
2847           xadj[1]++;
2848           break;
2849         }
2850       }
2851     }
2852     ierr = PetscFree(count);CHKERRQ(ierr);
2853   } else {
2854     if (xadj[1]) {
2855       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2856       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2857     }
2858   }
2859   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2860   if (use_square) {
2861     for (i=0;i<xadj[1];i++) {
2862       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2863     }
2864   }
2865   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2866 
2867   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2868 
2869   /*
2870     Restrict work on active processes only.
2871   */
2872   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2873   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2874   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2875   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2876   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2877   if (color) {
2878     ierr = PetscFree(xadj);CHKERRQ(ierr);
2879     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2880     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2881   } else {
2882     PetscInt coarsening_ratio;
2883     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2884     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2885     prank = rank;
2886     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2887     /*
2888     for (i=0;i<size;i++) {
2889       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2890     }
2891     */
2892     for (i=0;i<xadj[1];i++) {
2893       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2894     }
2895     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2896     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2897     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2898 
2899     /* Partition */
2900     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2901     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2902     if (use_vwgt) {
2903       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2904       v_wgt[0] = local_size;
2905       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2906     }
2907     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2908     coarsening_ratio = size/n_subdomains;
2909     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2910     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2911     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2912     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2913     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2914 
2915     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2916     if (contiguous) {
2917       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2918     } else {
2919       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2920     }
2921     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2922     /* clean up */
2923     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2924     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2925     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2926     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2927   }
2928   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2929 
2930   /* assemble parallel IS for sends */
2931   i = 1;
2932   if (color) i=0;
2933   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2934 
2935   /* get back IS */
2936   *is_sends = ranks_send_to;
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatISSubassemble"
2944 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2945 {
2946   Mat                    local_mat;
2947   Mat_IS                 *matis;
2948   IS                     is_sends_internal;
2949   PetscInt               rows,cols;
2950   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2951   PetscBool              ismatis,isdense,destroy_mat;
2952   ISLocalToGlobalMapping l2gmap;
2953   PetscInt*              l2gmap_indices;
2954   const PetscInt*        is_indices;
2955   MatType                new_local_type;
2956   /* buffers */
2957   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2958   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2959   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2960   /* MPI */
2961   MPI_Comm               comm,comm_n;
2962   PetscSubcomm           subcomm;
2963   PetscMPIInt            n_sends,n_recvs,commsize;
2964   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2965   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2966   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2967   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2968   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2969   PetscErrorCode         ierr;
2970 
2971   PetscFunctionBegin;
2972   /* TODO: add missing checks */
2973   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2974   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2975   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2976   PetscValidLogicalCollectiveInt(mat,nis,7);
2977   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2978   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2979   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2980   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2981   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2982   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2983   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2984   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2985     PetscInt mrows,mcols,mnrows,mncols;
2986     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2987     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2988     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2989     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2990     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2991     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2992   }
2993   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2994   PetscValidLogicalCollectiveInt(mat,bs,0);
2995   /* prepare IS for sending if not provided */
2996   if (!is_sends) {
2997     PetscBool pcontig = PETSC_TRUE;
2998     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2999     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3000   } else {
3001     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3002     is_sends_internal = is_sends;
3003   }
3004 
3005   /* get pointer of MATIS data */
3006   matis = (Mat_IS*)mat->data;
3007 
3008   /* get comm */
3009   comm = PetscObjectComm((PetscObject)mat);
3010 
3011   /* compute number of sends */
3012   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3013   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3014 
3015   /* compute number of receives */
3016   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3017   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3018   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3019   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3020   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3021   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3022   ierr = PetscFree(iflags);CHKERRQ(ierr);
3023 
3024   /* restrict comm if requested */
3025   subcomm = 0;
3026   destroy_mat = PETSC_FALSE;
3027   if (restrict_comm) {
3028     PetscMPIInt color,rank,subcommsize;
3029     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3030     color = 0;
3031     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3032     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3033     subcommsize = commsize - subcommsize;
3034     /* check if reuse has been requested */
3035     if (reuse == MAT_REUSE_MATRIX) {
3036       if (*mat_n) {
3037         PetscMPIInt subcommsize2;
3038         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3039         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3040         comm_n = PetscObjectComm((PetscObject)*mat_n);
3041       } else {
3042         comm_n = PETSC_COMM_SELF;
3043       }
3044     } else { /* MAT_INITIAL_MATRIX */
3045       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3046       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3047       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3048       comm_n = subcomm->comm;
3049     }
3050     /* flag to destroy *mat_n if not significative */
3051     if (color) destroy_mat = PETSC_TRUE;
3052   } else {
3053     comm_n = comm;
3054   }
3055 
3056   /* prepare send/receive buffers */
3057   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3058   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3059   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3060   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3061   if (nis) {
3062     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3063     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3064   }
3065 
3066   /* Get data from local matrices */
3067   if (!isdense) {
3068     /* TODO: See below some guidelines on how to prepare the local buffers */
3069     /*
3070        send_buffer_vals should contain the raw values of the local matrix
3071        send_buffer_idxs should contain:
3072        - MatType_PRIVATE type
3073        - PetscInt        size_of_l2gmap
3074        - PetscInt        global_row_indices[size_of_l2gmap]
3075        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3076     */
3077   } else {
3078     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3079     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3080     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
3081     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3082     send_buffer_idxs[1] = i;
3083     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3084     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3085     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3086     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3087     for (i=0;i<n_sends;i++) {
3088       ilengths_vals[is_indices[i]] = len*len;
3089       ilengths_idxs[is_indices[i]] = len+2;
3090     }
3091   }
3092   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3093   /* additional is (if any) */
3094   if (nis) {
3095     PetscMPIInt psum;
3096     PetscInt j;
3097     for (j=0,psum=0;j<nis;j++) {
3098       PetscInt plen;
3099       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3100       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3101       psum += len+1; /* indices + lenght */
3102     }
3103     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3104     for (j=0,psum=0;j<nis;j++) {
3105       PetscInt plen;
3106       const PetscInt *is_array_idxs;
3107       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3108       send_buffer_idxs_is[psum] = plen;
3109       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3110       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3111       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3112       psum += plen+1; /* indices + lenght */
3113     }
3114     for (i=0;i<n_sends;i++) {
3115       ilengths_idxs_is[is_indices[i]] = psum;
3116     }
3117     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3118   }
3119 
3120   buf_size_idxs = 0;
3121   buf_size_vals = 0;
3122   buf_size_idxs_is = 0;
3123   for (i=0;i<n_recvs;i++) {
3124     buf_size_idxs += (PetscInt)olengths_idxs[i];
3125     buf_size_vals += (PetscInt)olengths_vals[i];
3126     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3127   }
3128   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3129   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3130   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3131 
3132   /* get new tags for clean communications */
3133   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3134   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3135   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3136 
3137   /* allocate for requests */
3138   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3139   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3140   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3141   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3142   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3143   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3144 
3145   /* communications */
3146   ptr_idxs = recv_buffer_idxs;
3147   ptr_vals = recv_buffer_vals;
3148   ptr_idxs_is = recv_buffer_idxs_is;
3149   for (i=0;i<n_recvs;i++) {
3150     source_dest = onodes[i];
3151     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3152     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3153     ptr_idxs += olengths_idxs[i];
3154     ptr_vals += olengths_vals[i];
3155     if (nis) {
3156       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3157       ptr_idxs_is += olengths_idxs_is[i];
3158     }
3159   }
3160   for (i=0;i<n_sends;i++) {
3161     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3162     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3163     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3164     if (nis) {
3165       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3166     }
3167   }
3168   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3169   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3170 
3171   /* assemble new l2g map */
3172   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3173   ptr_idxs = recv_buffer_idxs;
3174   buf_size_idxs = 0;
3175   for (i=0;i<n_recvs;i++) {
3176     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3177     ptr_idxs += olengths_idxs[i];
3178   }
3179   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3180   ptr_idxs = recv_buffer_idxs;
3181   buf_size_idxs = 0;
3182   for (i=0;i<n_recvs;i++) {
3183     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3184     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3185     ptr_idxs += olengths_idxs[i];
3186   }
3187   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3188   ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3189   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3190 
3191   /* infer new local matrix type from received local matrices type */
3192   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3193   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3194   if (n_recvs) {
3195     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3196     ptr_idxs = recv_buffer_idxs;
3197     for (i=0;i<n_recvs;i++) {
3198       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3199         new_local_type_private = MATAIJ_PRIVATE;
3200         break;
3201       }
3202       ptr_idxs += olengths_idxs[i];
3203     }
3204     switch (new_local_type_private) {
3205       case MATDENSE_PRIVATE:
3206         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3207           new_local_type = MATSEQAIJ;
3208           bs = 1;
3209         } else { /* if I receive only 1 dense matrix */
3210           new_local_type = MATSEQDENSE;
3211           bs = 1;
3212         }
3213         break;
3214       case MATAIJ_PRIVATE:
3215         new_local_type = MATSEQAIJ;
3216         bs = 1;
3217         break;
3218       case MATBAIJ_PRIVATE:
3219         new_local_type = MATSEQBAIJ;
3220         break;
3221       case MATSBAIJ_PRIVATE:
3222         new_local_type = MATSEQSBAIJ;
3223         break;
3224       default:
3225         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3226         break;
3227     }
3228   } else { /* by default, new_local_type is seqdense */
3229     new_local_type = MATSEQDENSE;
3230     bs = 1;
3231   }
3232 
3233   /* create MATIS object if needed */
3234   if (reuse == MAT_INITIAL_MATRIX) {
3235     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3236     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3237   } else {
3238     /* it also destroys the local matrices */
3239     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3240   }
3241   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3242   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3243   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3244   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3245 
3246   /* set values */
3247   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3248   ptr_vals = recv_buffer_vals;
3249   ptr_idxs = recv_buffer_idxs;
3250   for (i=0;i<n_recvs;i++) {
3251     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3252       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3253       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3254       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3255       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3256       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3257     } else {
3258       /* TODO */
3259     }
3260     ptr_idxs += olengths_idxs[i];
3261     ptr_vals += olengths_vals[i];
3262   }
3263   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3267 
3268 #if 0
3269   if (!restrict_comm) { /* check */
3270     Vec       lvec,rvec;
3271     PetscReal infty_error;
3272 
3273     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3274     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3275     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3276     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3277     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3278     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3279     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3280     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3281     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3282   }
3283 #endif
3284 
3285   /* assemble new additional is (if any) */
3286   if (nis) {
3287     PetscInt **temp_idxs,*count_is,j,psum;
3288 
3289     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3290     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3291     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3292     ptr_idxs = recv_buffer_idxs_is;
3293     psum = 0;
3294     for (i=0;i<n_recvs;i++) {
3295       for (j=0;j<nis;j++) {
3296         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3297         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3298         psum += plen;
3299         ptr_idxs += plen+1; /* shift pointer to received data */
3300       }
3301     }
3302     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3303     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3304     for (i=1;i<nis;i++) {
3305       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3306     }
3307     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3308     ptr_idxs = recv_buffer_idxs_is;
3309     for (i=0;i<n_recvs;i++) {
3310       for (j=0;j<nis;j++) {
3311         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3312         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3313         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3314         ptr_idxs += plen+1; /* shift pointer to received data */
3315       }
3316     }
3317     for (i=0;i<nis;i++) {
3318       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3319       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3320       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3321     }
3322     ierr = PetscFree(count_is);CHKERRQ(ierr);
3323     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3324     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3325   }
3326   /* free workspace */
3327   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3328   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3329   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3330   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3331   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3332   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3333   if (isdense) {
3334     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3335     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3336   } else {
3337     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3338   }
3339   if (nis) {
3340     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3341     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3342   }
3343   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3344   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3345   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3346   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3347   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3348   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3349   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3350   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3351   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3352   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3353   ierr = PetscFree(onodes);CHKERRQ(ierr);
3354   if (nis) {
3355     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3356     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3357     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3358   }
3359   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3360   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3361     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3362     for (i=0;i<nis;i++) {
3363       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3364     }
3365   }
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 /* temporary hack into ksp private data structure */
3370 #include <petsc-private/kspimpl.h>
3371 
3372 #undef __FUNCT__
3373 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3374 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3375 {
3376   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3377   PC_IS                  *pcis = (PC_IS*)pc->data;
3378   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3379   MatNullSpace           CoarseNullSpace=NULL;
3380   ISLocalToGlobalMapping coarse_islg;
3381   IS                     coarse_is,*isarray;
3382   PetscInt               i,im_active=-1,active_procs=-1;
3383   PetscInt               nis,nisdofs,nisneu;
3384   PC                     pc_temp;
3385   PCType                 coarse_pc_type;
3386   KSPType                coarse_ksp_type;
3387   PetscBool              multilevel_requested,multilevel_allowed;
3388   PetscBool              setsym,issym,isherm,isbddc,isnn,coarse_reuse;
3389   Mat                    t_coarse_mat_is;
3390   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3391   PetscMPIInt            all_procs;
3392   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3393   PetscBool              compute_vecs = PETSC_FALSE;
3394   PetscErrorCode         ierr;
3395 
3396   PetscFunctionBegin;
3397   /* Assign global numbering to coarse dofs */
3398   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3399     compute_vecs = PETSC_TRUE;
3400     PetscInt ocoarse_size;
3401     ocoarse_size = pcbddc->coarse_size;
3402     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3403     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3404     /* see if we can avoid some work */
3405     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3406       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3407         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3408         coarse_reuse = PETSC_FALSE;
3409       } else { /* we can safely reuse already computed coarse matrix */
3410         coarse_reuse = PETSC_TRUE;
3411       }
3412     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3413       coarse_reuse = PETSC_FALSE;
3414     }
3415     /* reset any subassembling information */
3416     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3417     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3418   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3419     coarse_reuse = PETSC_TRUE;
3420   }
3421 
3422   /* count "active" (i.e. with positive local size) and "void" processes */
3423   im_active = !!(pcis->n);
3424   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3425   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3426   void_procs = all_procs-active_procs;
3427   csin_type_simple = PETSC_TRUE;
3428   if (pcbddc->current_level) {
3429     csin_ml = PETSC_TRUE;
3430     ncoarse_ml = void_procs;
3431     csin_ds = PETSC_TRUE;
3432     ncoarse_ds = void_procs;
3433     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3434   } else {
3435     csin_ml = PETSC_FALSE;
3436     ncoarse_ml = all_procs;
3437     if (void_procs) {
3438       csin_ds = PETSC_TRUE;
3439       ncoarse_ds = void_procs;
3440       csin_type_simple = PETSC_FALSE;
3441     } else {
3442       csin_ds = PETSC_FALSE;
3443       ncoarse_ds = all_procs;
3444     }
3445   }
3446 
3447   /*
3448     test if we can go multilevel: three conditions must be satisfied:
3449     - we have not exceeded the number of levels requested
3450     - we can actually subassemble the active processes
3451     - we can find a suitable number of MPI processes where we can place the subassembled problem
3452   */
3453   multilevel_allowed = PETSC_FALSE;
3454   multilevel_requested = PETSC_FALSE;
3455   if (pcbddc->current_level < pcbddc->max_levels) {
3456     multilevel_requested = PETSC_TRUE;
3457     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3458       multilevel_allowed = PETSC_FALSE;
3459     } else {
3460       multilevel_allowed = PETSC_TRUE;
3461     }
3462   }
3463   /* determine number of process partecipating to coarse solver */
3464   if (multilevel_allowed) {
3465     ncoarse = ncoarse_ml;
3466     csin = csin_ml;
3467   } else {
3468     ncoarse = ncoarse_ds;
3469     csin = csin_ds;
3470   }
3471 
3472   /* creates temporary l2gmap and IS for coarse indexes */
3473   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3474   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3475 
3476   /* creates temporary MATIS object for coarse matrix */
3477   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3478   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3479   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3480   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3481   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3482   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3483 
3484   /* compute dofs splitting and neumann boundaries for coarse dofs */
3485   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3486     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3487     const PetscInt         *idxs;
3488     ISLocalToGlobalMapping tmap;
3489 
3490     /* create map between primal indices (in local representative ordering) and local primal numbering */
3491     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3492     /* allocate space for temporary storage */
3493     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3494     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3495     /* allocate for IS array */
3496     nisdofs = pcbddc->n_ISForDofsLocal;
3497     nisneu = !!pcbddc->NeumannBoundariesLocal;
3498     nis = nisdofs + nisneu;
3499     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3500     /* dofs splitting */
3501     for (i=0;i<nisdofs;i++) {
3502       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3503       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3504       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3505       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3506       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3507       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3508       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3509       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3510     }
3511     /* neumann boundaries */
3512     if (pcbddc->NeumannBoundariesLocal) {
3513       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3514       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3515       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3516       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3517       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3518       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3519       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3520       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3521     }
3522     /* free memory */
3523     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3524     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3525     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3526   } else {
3527     nis = 0;
3528     nisdofs = 0;
3529     nisneu = 0;
3530     isarray = NULL;
3531   }
3532   /* destroy no longer needed map */
3533   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3534 
3535   /* restrict on coarse candidates (if needed) */
3536   coarse_mat_is = NULL;
3537   if (csin) {
3538     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3539       PetscInt j,tissize,*nisindices;
3540       PetscInt *coarse_candidates;
3541       const PetscInt* tisindices;
3542       /* get coarse candidates' ranks in pc communicator */
3543       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3544       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3545       for (i=0,j=0;i<all_procs;i++) {
3546         if (!coarse_candidates[i]) {
3547           coarse_candidates[j]=i;
3548           j++;
3549         }
3550       }
3551       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3552       /* get a suitable subassembling pattern */
3553       if (csin_type_simple) {
3554         PetscMPIInt rank;
3555         PetscInt    issize,isidx;
3556         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3557         if (im_active) {
3558           issize = 1;
3559           isidx = (PetscInt)rank;
3560         } else {
3561           issize = 0;
3562           isidx = -1;
3563         }
3564         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3565       } else {
3566         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3567       }
3568       if (pcbddc->dbg_flag) {
3569         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3570         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3571         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3572         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3573         for (i=0;i<j;i++) {
3574           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3575         }
3576         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3577         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3578       }
3579       /* shift the pattern on coarse candidates */
3580       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3581       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3582       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3583       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3584       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3585       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3586       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3587     }
3588     if (pcbddc->dbg_flag) {
3589       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3590       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3591       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3592       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3593     }
3594     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3595     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3596   } else {
3597     if (pcbddc->dbg_flag) {
3598       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3599       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3600       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3601     }
3602     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3603     coarse_mat_is = t_coarse_mat_is;
3604   }
3605 
3606   /* create local to global scatters for coarse problem */
3607   if (compute_vecs) {
3608     PetscInt lrows;
3609     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3610     if (coarse_mat_is) {
3611       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3612     } else {
3613       lrows = 0;
3614     }
3615     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3616     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3617     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3618     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3619     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3620   }
3621   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3622   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3623 
3624   /* set defaults for coarse KSP and PC */
3625   if (multilevel_allowed) {
3626     coarse_ksp_type = KSPRICHARDSON;
3627     coarse_pc_type = PCBDDC;
3628   } else {
3629     coarse_ksp_type = KSPPREONLY;
3630     coarse_pc_type = PCREDUNDANT;
3631   }
3632 
3633   /* print some info if requested */
3634   if (pcbddc->dbg_flag) {
3635     if (!multilevel_allowed) {
3636       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3637       if (multilevel_requested) {
3638         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3639       } else if (pcbddc->max_levels) {
3640         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3641       }
3642       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3643     }
3644   }
3645 
3646   /* create the coarse KSP object only once with defaults */
3647   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3648   issym = PETSC_FALSE;
3649   isherm = PETSC_FALSE;
3650   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3651   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3652   if (coarse_mat_is) {
3653     MatReuse coarse_mat_reuse;
3654     PetscViewer dbg_viewer;
3655     if (pcbddc->dbg_flag) {
3656       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3657       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3658     }
3659     if (!pcbddc->coarse_ksp) {
3660       char prefix[256],str_level[16];
3661       size_t len;
3662       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3663       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3664       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3665       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr);
3666       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3667       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3668       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3669       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3670       /* prefix */
3671       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3672       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3673       if (!pcbddc->current_level) {
3674         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3675         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3676       } else {
3677         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3678         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3679         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3680         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3681         *(prefix+len)='\0';
3682         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3683         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3684       }
3685       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3686       /* allow user customization */
3687       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3688     }
3689 
3690     /* get some info after set from options */
3691     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3692     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3693     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3694     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3695       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3696       isbddc = PETSC_FALSE;
3697     }
3698 
3699     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3700     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3701     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3702     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3703     if (nisdofs) {
3704       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3705       for (i=0;i<nisdofs;i++) {
3706         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3707       }
3708     }
3709     if (nisneu) {
3710       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3711       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3712     }
3713 
3714     /* assemble coarse matrix */
3715     if (coarse_reuse) {
3716       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3717       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3718       coarse_mat_reuse = MAT_REUSE_MATRIX;
3719     } else {
3720       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3721     }
3722     if (isbddc || isnn) {
3723       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3724         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3725         if (pcbddc->dbg_flag) {
3726           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3727           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3728           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3729           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3730         }
3731       }
3732       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3733     } else {
3734       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3735     }
3736     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3737 
3738     /* propagate symmetry info to coarse matrix */
3739     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3740     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3741 
3742     /* set operators */
3743     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3744     if (pcbddc->dbg_flag) {
3745       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3746     }
3747   } else { /* processes non partecipating to coarse solver (if any) */
3748     coarse_mat = 0;
3749   }
3750   ierr = PetscFree(isarray);CHKERRQ(ierr);
3751 
3752   /* Compute coarse null space (special handling by BDDC only) */
3753   if (pcbddc->NullSpace) {
3754     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3755   }
3756 
3757   if (pcbddc->coarse_ksp) {
3758     Vec crhs,csol;
3759     PetscBool ispreonly;
3760     if (CoarseNullSpace) {
3761       if (isbddc) {
3762         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3763       } else {
3764         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3765       }
3766     }
3767     /* setup coarse ksp */
3768     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3769     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3770     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3771     /* hack */
3772     if (!csol) {
3773       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3774     }
3775     if (!crhs) {
3776       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3777     }
3778     /* Check coarse problem if in debug mode or if solving with an iterative method */
3779     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3780     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3781       KSP       check_ksp;
3782       KSPType   check_ksp_type;
3783       PC        check_pc;
3784       Vec       check_vec,coarse_vec;
3785       PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3786       PetscInt  its;
3787       PetscBool compute_eigs;
3788       PetscReal *eigs_r,*eigs_c;
3789       PetscInt  neigs;
3790 
3791       /* Create ksp object suitable for estimation of extreme eigenvalues */
3792       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3793       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3794       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3795       if (ispreonly) {
3796         check_ksp_type = KSPPREONLY;
3797         compute_eigs = PETSC_FALSE;
3798       } else {
3799         check_ksp_type = KSPGMRES;
3800         compute_eigs = PETSC_TRUE;
3801       }
3802       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3803       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3804       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3805       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3806       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3807       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3808       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3809       /* create random vec */
3810       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3811       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3812       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3813       if (CoarseNullSpace) {
3814         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3815       }
3816       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3817       /* solve coarse problem */
3818       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3819       if (CoarseNullSpace) {
3820         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3821       }
3822       /* set eigenvalue estimation if preonly has not been requested */
3823       if (compute_eigs) {
3824         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3825         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3826         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3827         lambda_max = eigs_r[neigs-1];
3828         lambda_min = eigs_r[0];
3829         if (pcbddc->use_coarse_estimates) {
3830           if (lambda_max>lambda_min) {
3831             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3832             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3833           }
3834         }
3835       }
3836 
3837       /* check coarse problem residual error */
3838       if (pcbddc->dbg_flag) {
3839         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3840         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3841         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3842         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3843         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3844         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3845         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3846         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3847         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3848         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3849         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3850         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3851         if (compute_eigs) {
3852           PetscReal lambda_max_s,lambda_min_s;
3853           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3854           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3855           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3856           for (i=0;i<neigs;i++) {
3857             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3858           }
3859         }
3860         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3861         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3862       }
3863       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3864       if (compute_eigs) {
3865         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3866         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3867       }
3868     }
3869   }
3870   /* print additional info */
3871   if (pcbddc->dbg_flag) {
3872     /* waits until all processes reaches this point */
3873     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3874     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3875     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3876   }
3877 
3878   /* free memory */
3879   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3880   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883 
3884 #undef __FUNCT__
3885 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3886 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3887 {
3888   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3889   PC_IS*         pcis = (PC_IS*)pc->data;
3890   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3891   PetscInt       i,coarse_size;
3892   PetscInt       *local_primal_indices;
3893   PetscErrorCode ierr;
3894 
3895   PetscFunctionBegin;
3896   /* Compute global number of coarse dofs */
3897   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3898     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3899   }
3900   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3901 
3902   /* check numbering */
3903   if (pcbddc->dbg_flag) {
3904     PetscScalar coarsesum,*array;
3905 
3906     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3907     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3908     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3909     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3910     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3911     for (i=0;i<pcbddc->local_primal_size;i++) {
3912       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3913     }
3914     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3915     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3916     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3917     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3918     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3919     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3920     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3921     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3922     for (i=0;i<pcis->n;i++) {
3923       if (array[i] == 1.0) {
3924         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3925       }
3926     }
3927     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3928     for (i=0;i<pcis->n;i++) {
3929       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3930     }
3931     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3932     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3933     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3934     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3935     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3936     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3937     if (pcbddc->dbg_flag > 1) {
3938       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3939       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3940       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3941       for (i=0;i<pcbddc->local_primal_size;i++) {
3942         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3943       }
3944       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3945     }
3946     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3947   }
3948   /* get back data */
3949   *coarse_size_n = coarse_size;
3950   *local_primal_indices_n = local_primal_indices;
3951   PetscFunctionReturn(0);
3952 }
3953 
3954 #undef __FUNCT__
3955 #define __FUNCT__ "PCBDDCGlobalToLocal"
3956 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3957 {
3958   PC_IS*         pcis = (PC_IS*)pc->data;
3959   IS             localis_t;
3960   PetscInt       i,lsize,*idxs;
3961   PetscScalar    *vals;
3962   PetscErrorCode ierr;
3963 
3964   PetscFunctionBegin;
3965   /* get dirichlet indices in local ordering exploiting local to global map */
3966   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3967   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3968   for (i=0;i<lsize;i++) vals[i] = 1.0;
3969   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3970   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3971   if (idxs) { /* multilevel guard */
3972     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3973   }
3974   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3975   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3976   ierr = PetscFree(vals);CHKERRQ(ierr);
3977   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3978   /* now compute dirichlet set in local ordering */
3979   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3980   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3981   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3982   for (i=0,lsize=0;i<pcis->n;i++) {
3983     if (vals[i] == 1.0) {
3984       lsize++;
3985     }
3986   }
3987   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3988   for (i=0,lsize=0;i<pcis->n;i++) {
3989     if (vals[i] == 1.0) {
3990       idxs[lsize++] = i;
3991     }
3992   }
3993   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3994   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3995   *localis = localis_t;
3996   PetscFunctionReturn(0);
3997 }
3998