xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision d65f70fd938573fec9f4e0f30225958f4666553f)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #if 0
9 #undef __FUNCT__
10 #define __FUNCT__ "PCBDDCAdaptiveSelection"
11 PetscErrorCode PCBDDCAdaptiveSelection(PC pc, MatNullSpace* adapt_nnsp)
12 {
13   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs[0],pcbddc->mat_graph,...);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 #endif
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "PCBDDCSetUpSolvers"
24 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
25 {
26   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
27   PetscScalar    *coarse_submat_vals;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   /* Setup local scatters R_to_B and (optionally) R_to_D */
32   /* PCBDDCSetUpLocalWorkVectors should be called first! */
33   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
34 
35   /* Setup local neumann solver ksp_R */
36   /* PCBDDCSetUpLocalScatters should be called first! */
37   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
38 
39   /* Change global null space passed in by the user if change of basis has been requested */
40   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
41     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
42   }
43 
44   /*
45      Setup local correction and local part of coarse basis.
46      Gives back the dense local part of the coarse matrix in column major ordering
47   */
48   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
49 
50   /* Compute total number of coarse nodes and setup coarse solver */
51   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
52 
53   /* free */
54   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "PCBDDCResetCustomization"
60 PetscErrorCode PCBDDCResetCustomization(PC pc)
61 {
62   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
67   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
68   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
70   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
71   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
72   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
73   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
74   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
75   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
76   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetTopography"
82 PetscErrorCode PCBDDCResetTopography(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
89   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
91   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
92   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[0]);CHKERRQ(ierr);
93   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[1]);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "PCBDDCResetSolvers"
99 PetscErrorCode PCBDDCResetSolvers(PC pc)
100 {
101   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
102   PetscErrorCode ierr;
103 
104   PetscFunctionBegin;
105   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
110   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
113   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
114   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
115   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
116   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
117   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
118   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
119   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
120   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
121   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
122   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
123   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
124   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
125   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
126   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
127   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
128   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
134 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
135 {
136   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
137   PC_IS          *pcis = (PC_IS*)pc->data;
138   VecType        impVecType;
139   PetscInt       n_constraints,n_R,old_size;
140   PetscErrorCode ierr;
141 
142   PetscFunctionBegin;
143   if (!pcbddc->ConstraintMatrix) {
144     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
145   }
146   /* get sizes */
147   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
148   n_R = pcis->n-pcbddc->n_actual_vertices;
149   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
150   /* local work vectors (try to avoid unneeded work)*/
151   /* R nodes */
152   old_size = -1;
153   if (pcbddc->vec1_R) {
154     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
155   }
156   if (n_R != old_size) {
157     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
158     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
159     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
160     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
161     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
162     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
163   }
164   /* local primal dofs */
165   old_size = -1;
166   if (pcbddc->vec1_P) {
167     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
168   }
169   if (pcbddc->local_primal_size != old_size) {
170     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
171     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
172     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
173     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
174   }
175   /* local explicit constraints */
176   old_size = -1;
177   if (pcbddc->vec1_C) {
178     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
179   }
180   if (n_constraints && n_constraints != old_size) {
181     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
182     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
183     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
184     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "PCBDDCSetUpCorrection"
191 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
192 {
193   PetscErrorCode         ierr;
194   /* pointers to pcis and pcbddc */
195   PC_IS*                 pcis = (PC_IS*)pc->data;
196   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
197   /* submatrices of local problem */
198   Mat                    A_RV,A_VR,A_VV;
199   /* working matrices */
200   Mat                    M1,M2,M3,C_CR;
201   /* working vectors */
202   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
203   /* additional working stuff */
204   IS                     is_aux;
205   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
206   const PetscScalar      *array,*row_cmat_values;
207   const PetscInt         *row_cmat_indices,*idx_R_local;
208   PetscInt               *idx_V_B,*auxindices;
209   PetscInt               n_vertices,n_constraints,size_of_constraint;
210   PetscInt               i,j,n_R,n_D,n_B;
211   PetscBool              unsymmetric_check;
212   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
213   MatType                impMatType;
214   /* some shortcuts to scalars */
215   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
216   /* for debugging purposes */
217   PetscReal              *coarsefunctions_errors,*constraints_errors;
218 
219   PetscFunctionBegin;
220   /* get number of vertices (corners plus constraints with change of basis)
221      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
222   n_vertices = pcbddc->n_actual_vertices;
223   n_constraints = pcbddc->local_primal_size-n_vertices;
224   /* Set Non-overlapping dimensions */
225   n_B = pcis->n_B; n_D = pcis->n - n_B;
226   n_R = pcis->n-n_vertices;
227 
228   /* Set types for local objects needed by BDDC precondtioner */
229   impMatType = MATSEQDENSE;
230 
231   /* Allocating some extra storage just to be safe */
232   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
233   for (i=0;i<pcis->n;i++) auxindices[i]=i;
234 
235   /* vertices in boundary numbering */
236   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
237   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
238   if (i != n_vertices) {
239     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
240   }
241 
242   /* Precompute stuffs needed for preprocessing and application of BDDC*/
243   if (n_constraints) {
244     /* see if we can save some allocations */
245     if (pcbddc->local_auxmat2) {
246       PetscInt on_R,on_constraints;
247       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
248       if (on_R != n_R || on_constraints != n_constraints) {
249         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
250         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
251       }
252     }
253     /* work vectors */
254     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
255     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
256     /* auxiliary matrices */
257     if (!pcbddc->local_auxmat2) {
258       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
259       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
260       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
261       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
262     }
263 
264     /* Extract constraints on R nodes: C_{CR}  */
265     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
266     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
267     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
268 
269     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
270     for (i=0;i<n_constraints;i++) {
271       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
272       /* Get row of constraint matrix in R numbering */
273       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
274       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
275       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
276       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
277       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
278       /* Solve for row of constraint matrix in R numbering */
279       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
280       /* Set values in local_auxmat2 */
281       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
282       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
283       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
284     }
285     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
288 
289     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
290     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
291     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
292     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
293     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
294     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
295     ierr = MatSetUp(M1);CHKERRQ(ierr);
296     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
297     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
298     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
299     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
300     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
301     ierr = MatDestroy(&M2);CHKERRQ(ierr);
302     ierr = MatDestroy(&M3);CHKERRQ(ierr);
303     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
304     if (!pcbddc->local_auxmat1) {
305       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
306     } else {
307       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
308     }
309   }
310 
311   /* Get submatrices from subdomain matrix */
312   if (n_vertices) {
313     PetscInt ibs,mbs;
314     PetscBool issbaij;
315     Mat newmat;
316 
317     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
318     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
319     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
320     if (ibs != mbs) { /* need to convert to SEQAIJ */
321       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
322       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
325       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
326     } else {
327       /* this is safe */
328       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
329       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
330       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
331         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
332         /* which of the two approaches is faster? */
333         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
334         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
335         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
336         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
337         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
338       } else {
339         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
340         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
341       }
342     }
343     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
344     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
345     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
346   }
347 
348   /* Matrix of coarse basis functions (local) */
349   if (pcbddc->coarse_phi_B) {
350     PetscInt on_B,on_primal;
351     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
352     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
353       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
354       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
355     }
356   }
357   if (pcbddc->coarse_phi_D) {
358     PetscInt on_D,on_primal;
359     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
360     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
361       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
362       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
363     }
364   }
365   if (!pcbddc->coarse_phi_B) {
366     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
367     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
368     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
369     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
370   }
371   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
372     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
373     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
374     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
375     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
376   }
377 
378   if (pcbddc->dbg_flag) {
379     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
380     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
381     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
382   }
383   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
384   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
385 
386   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
387 
388   /* vertices */
389   for (i=0;i<n_vertices;i++) {
390     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
391     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
392     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
393     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
394     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
395     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
396     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
397     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
398     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
399     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
400     if (n_constraints) {
401       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
402       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
403       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
404     }
405     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
406     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
407 
408     /* Set values in coarse basis function and subdomain part of coarse_mat */
409     /* coarse basis functions */
410     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
411     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
412     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
416     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
417     if (pcbddc->switch_static || pcbddc->dbg_flag) {
418       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
419       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
421       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
423     }
424     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
425     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
427     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
428     if (n_constraints) {
429       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
430       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
432     }
433 
434     /* check */
435     if (pcbddc->dbg_flag) {
436       /* assemble subdomain vector on local nodes */
437       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
438       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
439       if (n_R) {
440         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
441       }
442       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
443       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
444       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
445       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
446       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
447       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
448       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
449       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
450       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
451       if (n_constraints) {
452         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
453         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
454         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
455       }
456       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
457       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
458       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
459       /* check saddle point solution */
460       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
461       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
462       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
463       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
464       /* shift by the identity matrix */
465       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
466       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
467       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
468       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
469     }
470   }
471 
472   /* constraints */
473   for (i=0;i<n_constraints;i++) {
474     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
475     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
477     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
478     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
479     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
480     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
482     if (n_vertices) {
483       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
484     }
485     /* Set values in coarse basis function and subdomain part of coarse_mat */
486     /* coarse basis functions */
487     j = i+n_vertices; /* don't touch this! */
488     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
489     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
494     if (pcbddc->switch_static || pcbddc->dbg_flag) {
495       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
498       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
500     }
501     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
502     if (n_vertices) {
503       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
505       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506     }
507     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
509     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510 
511     if (pcbddc->dbg_flag) {
512       /* assemble subdomain vector on nodes */
513       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
514       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
515       if (n_R) {
516         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
517       }
518       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
521       /* assemble subdomain vector of lagrange multipliers */
522       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
523       if (n_vertices) {
524         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
525         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
526         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
527       }
528       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
529       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
530       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
531       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
532       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
533       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
534       /* check saddle point solution */
535       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
536       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
537       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
538       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
539       /* shift by the identity matrix */
540       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
541       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
542       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
543       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
544     }
545   }
546   /* call assembling routines for local coarse basis */
547   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549   if (pcbddc->switch_static || pcbddc->dbg_flag) {
550     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552   }
553 
554   /* compute other basis functions for non-symmetric problems */
555   if (!pcbddc->issym) {
556     if (!pcbddc->coarse_psi_B) {
557       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
558       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
559       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
560       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
561     }
562     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
563       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
564       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
565       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
566       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
567     }
568     for (i=0;i<pcbddc->local_primal_size;i++) {
569       if (n_constraints) {
570         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
571         for (j=0;j<n_constraints;j++) {
572           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
573         }
574         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
575         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
576       }
577       if (i<n_vertices) {
578         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
579         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
580         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
581         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
582         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
583         if (n_constraints) {
584           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
585         }
586       } else {
587         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
588       }
589       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
590       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
591       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
592       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
593       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
594       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
595       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
596       if (i<n_vertices) {
597         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
598       }
599       if (pcbddc->switch_static || pcbddc->dbg_flag) {
600         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
601         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
602         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
603         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
604         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
605       }
606 
607       if (pcbddc->dbg_flag) {
608         /* assemble subdomain vector on nodes */
609         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
610         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
611         if (n_R) {
612           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
613         }
614         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
615         if (i<n_vertices) {
616           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
617         }
618         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
619         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
620         /* assemble subdomain vector of lagrange multipliers */
621         for (j=0;j<pcbddc->local_primal_size;j++) {
622           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
623         }
624         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
625         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
626         /* check saddle point solution */
627         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
628         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
629         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
630         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
631         /* shift by the identity matrix */
632         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
633         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
634         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
635         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
636       }
637     }
638     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
639     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
640     if (pcbddc->switch_static || pcbddc->dbg_flag) {
641       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
642       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
643     }
644     unsymmetric_check = PETSC_TRUE;
645   } else { /* take references to already computed coarse basis */
646     unsymmetric_check = PETSC_FALSE;
647     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
648     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
649     if (pcbddc->coarse_phi_D) {
650       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
651       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
652     }
653   }
654   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
655   /* Checking coarse_sub_mat and coarse basis functios */
656   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
657   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
658   if (pcbddc->dbg_flag) {
659     Mat         coarse_sub_mat;
660     Mat         AUXMAT,TM1,TM2,TM3,TM4;
661     Mat         coarse_phi_D,coarse_phi_B;
662     Mat         coarse_psi_D,coarse_psi_B;
663     Mat         A_II,A_BB,A_IB,A_BI;
664     MatType     checkmattype=MATSEQAIJ;
665     PetscReal   real_value;
666 
667     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
668     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
669     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
670     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
671     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
672     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
673     if (unsymmetric_check) {
674       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
675       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
676     }
677     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
678 
679     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
680     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
681     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
682     if (unsymmetric_check) {
683       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
684       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
685       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
686       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
687       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
688       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
689       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
690       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
691       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
692       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
693       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
694       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
695     } else {
696       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
697       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
698       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
699       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
700       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
701       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
702       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
703       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
704     }
705     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
706     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
707     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
708     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
709     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
710     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
711     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
712     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
714     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
715     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
716     for (i=0;i<pcbddc->local_primal_size;i++) {
717       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
720     for (i=0;i<pcbddc->local_primal_size;i++) {
721       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
722     }
723     if (unsymmetric_check) {
724       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
725       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
726         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
727       }
728       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
729       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
730         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
731       }
732     }
733     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
734     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
735     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
738     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
739     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
740     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
741     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
742     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
743     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
744     if (unsymmetric_check) {
745       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
746       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
747     }
748     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
749     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
750     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
751     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
752   }
753   /* free memory */
754   if (n_vertices) {
755     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
756     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
757     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
758     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
759     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
760   }
761   if (n_constraints) {
762     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
763     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
764     ierr = MatDestroy(&M1);CHKERRQ(ierr);
765     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
766   }
767   ierr = PetscFree(auxindices);CHKERRQ(ierr);
768   /* get back data */
769   *coarse_submat_vals_n = coarse_submat_vals;
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatGetSubMatrixUnsorted"
775 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
776 {
777   Mat            *work_mat;
778   IS             isrow_s,iscol_s;
779   PetscBool      rsorted,csorted;
780   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
781   PetscErrorCode ierr;
782 
783   PetscFunctionBegin;
784   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
785   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
786   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
787   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
788 
789   if (!rsorted) {
790     const PetscInt *idxs;
791     PetscInt *idxs_sorted,i;
792 
793     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
794     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
795     for (i=0;i<rsize;i++) {
796       idxs_perm_r[i] = i;
797     }
798     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
799     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
800     for (i=0;i<rsize;i++) {
801       idxs_sorted[i] = idxs[idxs_perm_r[i]];
802     }
803     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
804     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
805   } else {
806     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
807     isrow_s = isrow;
808   }
809 
810   if (!csorted) {
811     if (isrow == iscol) {
812       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
813       iscol_s = isrow_s;
814     } else {
815       const PetscInt *idxs;
816       PetscInt *idxs_sorted,i;
817 
818       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
819       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
820       for (i=0;i<csize;i++) {
821         idxs_perm_c[i] = i;
822       }
823       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
824       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
825       for (i=0;i<csize;i++) {
826         idxs_sorted[i] = idxs[idxs_perm_c[i]];
827       }
828       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
829       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
830     }
831   } else {
832     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
833     iscol_s = iscol;
834   }
835 
836   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
837 
838   if (!rsorted || !csorted) {
839     Mat      new_mat;
840     IS       is_perm_r,is_perm_c;
841 
842     if (!rsorted) {
843       PetscInt *idxs_r,i;
844       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
845       for (i=0;i<rsize;i++) {
846         idxs_r[idxs_perm_r[i]] = i;
847       }
848       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
849       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
850     } else {
851       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
852     }
853     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
854 
855     if (!csorted) {
856       if (isrow_s == iscol_s) {
857         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
858         is_perm_c = is_perm_r;
859       } else {
860         PetscInt *idxs_c,i;
861         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
862         for (i=0;i<csize;i++) {
863           idxs_c[idxs_perm_c[i]] = i;
864         }
865         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
866         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
867       }
868     } else {
869       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
870     }
871     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
872 
873     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
874     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
875     work_mat[0] = new_mat;
876     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
877     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
878   }
879 
880   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
881   *B = work_mat[0];
882   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
883   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
884   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
890 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
891 {
892   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
893   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
894   Mat            new_mat;
895   IS             is_local,is_global;
896   PetscInt       local_size;
897   PetscBool      isseqaij;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
902   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
903   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
904   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
905   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
906   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
907   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
908 
909   /* check */
910   if (pcbddc->dbg_flag) {
911     Vec       x,x_change;
912     PetscReal error;
913 
914     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
915     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
916     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
917     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
920     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
921     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
922     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
923     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
924     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
925     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
926     ierr = VecDestroy(&x);CHKERRQ(ierr);
927     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
928   }
929 
930   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
931   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
932   if (isseqaij) {
933     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
934   } else {
935     Mat work_mat;
936     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
937     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
938     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
939   }
940   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
941   /*
942   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
943   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
944   */
945   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
951 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
952 {
953   PC_IS*         pcis = (PC_IS*)(pc->data);
954   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
955   IS             is_aux1,is_aux2;
956   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
957   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
958   PetscInt       vbs,bs;
959   PetscBT        bitmask;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   /*
964     No need to setup local scatters if
965       - primal space is unchanged
966         AND
967       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
968         AND
969       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
970   */
971   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
972     PetscFunctionReturn(0);
973   }
974   /* destroy old objects */
975   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
976   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
977   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
978   /* Set Non-overlapping dimensions */
979   n_B = pcis->n_B; n_D = pcis->n - n_B;
980   n_vertices = pcbddc->n_actual_vertices;
981   /* create auxiliary bitmask */
982   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
983   for (i=0;i<n_vertices;i++) {
984     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
985   }
986 
987   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
988   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
989   for (i=0, n_R=0; i<pcis->n; i++) {
990     if (!PetscBTLookup(bitmask,i)) {
991       idx_R_local[n_R] = i;
992       n_R++;
993     }
994   }
995 
996   /* Block code */
997   vbs = 1;
998   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
999   if (bs>1 && !(n_vertices%bs)) {
1000     PetscBool is_blocked = PETSC_TRUE;
1001     PetscInt  *vary;
1002     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1003     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1004     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1005     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1006     for (i=0; i<n_vertices; i++) {
1007       if (vary[i]!=0 && vary[i]!=bs) {
1008         is_blocked = PETSC_FALSE;
1009         break;
1010       }
1011     }
1012     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1013       vbs = bs;
1014       for (i=0;i<n_R/vbs;i++) {
1015         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1016       }
1017     }
1018     ierr = PetscFree(vary);CHKERRQ(ierr);
1019   }
1020   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1021   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1022 
1023   /* print some info if requested */
1024   if (pcbddc->dbg_flag) {
1025     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1026     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1027     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1028     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1029     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1030     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1031     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1032     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1033   }
1034 
1035   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1036   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1037   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1038   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1039   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1040   for (i=0; i<n_D; i++) {
1041     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1042   }
1043   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1044   for (i=0, j=0; i<n_R; i++) {
1045     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1046       aux_array1[j++] = i;
1047     }
1048   }
1049   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1050   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1051   for (i=0, j=0; i<n_B; i++) {
1052     if (!PetscBTLookup(bitmask,is_indices[i])) {
1053       aux_array2[j++] = i;
1054     }
1055   }
1056   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1057   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1058   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1059   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1060   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1061 
1062   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1063     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1064     for (i=0, j=0; i<n_R; i++) {
1065       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1066         aux_array1[j++] = i;
1067       }
1068     }
1069     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1070     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1071     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1072   }
1073   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1074   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1081 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1082 {
1083   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1084   PC_IS          *pcis = (PC_IS*)pc->data;
1085   PC             pc_temp;
1086   Mat            A_RR;
1087   MatReuse       reuse;
1088   PetscScalar    m_one = -1.0;
1089   PetscReal      value;
1090   PetscInt       n_D,n_R,ibs,mbs;
1091   PetscBool      use_exact,use_exact_reduced,issbaij;
1092   PetscErrorCode ierr;
1093   /* prefixes stuff */
1094   char           dir_prefix[256],neu_prefix[256],str_level[16];
1095   size_t         len;
1096 
1097   PetscFunctionBegin;
1098 
1099   /* compute prefixes */
1100   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1101   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1102   if (!pcbddc->current_level) {
1103     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1104     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1105     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1106     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1107   } else {
1108     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1109     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1110     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1111     len -= 15; /* remove "pc_bddc_coarse_" */
1112     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1113     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1114     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1115     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1116     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1117     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1118     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1119     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1120   }
1121 
1122   /* DIRICHLET PROBLEM */
1123   if (dirichlet) {
1124     /* Matrix for Dirichlet problem is pcis->A_II */
1125     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1126     if (!pcbddc->ksp_D) { /* create object if not yet build */
1127       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1128       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1129       /* default */
1130       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1131       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1132       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1133       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1134       if (issbaij) {
1135         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1136       } else {
1137         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1138       }
1139       /* Allow user's customization */
1140       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1141       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1142     }
1143     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1144     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1145     if (!n_D) {
1146       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1147       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1148     }
1149     /* Set Up KSP for Dirichlet problem of BDDC */
1150     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1151     /* set ksp_D into pcis data */
1152     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1153     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1154     pcis->ksp_D = pcbddc->ksp_D;
1155   }
1156 
1157   /* NEUMANN PROBLEM */
1158   A_RR = 0;
1159   if (neumann) {
1160     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1161     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1162     if (pcbddc->ksp_R) { /* already created ksp */
1163       PetscInt nn_R;
1164       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1165       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1166       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1167       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1168         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1169         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1170         reuse = MAT_INITIAL_MATRIX;
1171       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1172         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1173           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1174           reuse = MAT_INITIAL_MATRIX;
1175         } else { /* safe to reuse the matrix */
1176           reuse = MAT_REUSE_MATRIX;
1177         }
1178       }
1179       /* last check */
1180       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1181         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1182         reuse = MAT_INITIAL_MATRIX;
1183       }
1184     } else { /* first time, so we need to create the matrix */
1185       reuse = MAT_INITIAL_MATRIX;
1186     }
1187     /* extract A_RR */
1188     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1189     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1190     if (ibs != mbs) {
1191       Mat newmat;
1192       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1193       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1194       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1195     } else {
1196       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1197     }
1198     if (!pcbddc->ksp_R) { /* create object if not present */
1199       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1200       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1201       /* default */
1202       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1203       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1204       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1205       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1206       if (issbaij) {
1207         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1208       } else {
1209         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1210       }
1211       /* Allow user's customization */
1212       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1213       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1214     }
1215     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1216     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1217     if (!n_R) {
1218       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1219       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1220     }
1221     /* Set Up KSP for Neumann problem of BDDC */
1222     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1223   }
1224 
1225   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1226   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1227     if (pcbddc->dbg_flag) {
1228       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1229       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1230       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1231     }
1232     if (dirichlet) { /* Dirichlet */
1233       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1234       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1235       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1236       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1237       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1238       /* need to be adapted? */
1239       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1240       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1241       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1242       /* print info */
1243       if (pcbddc->dbg_flag) {
1244         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1245         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1246       }
1247       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1248         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1249       }
1250     }
1251     if (neumann) { /* Neumann */
1252       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1253       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1254       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1255       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1256       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1257       /* need to be adapted? */
1258       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1259       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1260       /* print info */
1261       if (pcbddc->dbg_flag) {
1262         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1263         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1264       }
1265       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1266         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1267       }
1268     }
1269   }
1270   /* free Neumann problem's matrix */
1271   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1277 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1278 {
1279   PetscErrorCode ierr;
1280   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1281 
1282   PetscFunctionBegin;
1283   if (applytranspose) {
1284     if (pcbddc->local_auxmat1) {
1285       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1286       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1287     }
1288     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1289   } else {
1290     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1291     if (pcbddc->local_auxmat1) {
1292       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1293       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1294     }
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1300 #undef __FUNCT__
1301 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1302 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1303 {
1304   PetscErrorCode ierr;
1305   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1306   PC_IS*            pcis = (PC_IS*)  (pc->data);
1307   const PetscScalar zero = 0.0;
1308 
1309   PetscFunctionBegin;
1310   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1311   if (applytranspose) {
1312     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1313     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1314   } else {
1315     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1316     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1317   }
1318   /* start communications from local primal nodes to rhs of coarse solver */
1319   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1320   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322 
1323   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1324   /* TODO remove null space when doing multilevel */
1325   if (pcbddc->coarse_ksp) {
1326     if (applytranspose) {
1327       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1328     } else {
1329       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1330     }
1331   }
1332 
1333   /* Local solution on R nodes */
1334   if (pcis->n) {
1335     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1336     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1337     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1338     if (pcbddc->switch_static) {
1339       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1340       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1341     }
1342     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1343     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1344     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346     if (pcbddc->switch_static) {
1347       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1349     }
1350   }
1351 
1352   /* communications from coarse sol to local primal nodes */
1353   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355 
1356   /* Sum contributions from two levels */
1357   if (applytranspose) {
1358     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1359     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1360   } else {
1361     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1362     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1368 #undef __FUNCT__
1369 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1370 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1371 {
1372   PetscErrorCode ierr;
1373   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1374   PetscScalar    *array,*array2;
1375   Vec            from,to;
1376 
1377   PetscFunctionBegin;
1378   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1379     from = pcbddc->coarse_vec;
1380     to = pcbddc->vec1_P;
1381     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1382       Vec tvec;
1383       PetscInt lsize;
1384       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1385       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1386       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1387       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1388       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1389       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1390       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1391     }
1392   } else { /* from local to global -> put data in coarse right hand side */
1393     from = pcbddc->vec1_P;
1394     to = pcbddc->coarse_vec;
1395   }
1396   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1402 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1403 {
1404   PetscErrorCode ierr;
1405   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1406   PetscScalar    *array,*array2;
1407   Vec            from,to;
1408 
1409   PetscFunctionBegin;
1410   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1411     from = pcbddc->coarse_vec;
1412     to = pcbddc->vec1_P;
1413   } else { /* from local to global -> put data in coarse right hand side */
1414     from = pcbddc->vec1_P;
1415     to = pcbddc->coarse_vec;
1416   }
1417   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1418   if (smode == SCATTER_FORWARD) {
1419     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1420       Vec tvec;
1421       PetscInt lsize;
1422       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1423       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1424       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1425       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1426       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1427       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1428       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1429     }
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 /* uncomment for testing purposes */
1435 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1436 #undef __FUNCT__
1437 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1438 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1439 {
1440   PetscErrorCode    ierr;
1441   PC_IS*            pcis = (PC_IS*)(pc->data);
1442   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1443   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1444   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1445   MatType           impMatType=MATSEQAIJ;
1446   /* one and zero */
1447   PetscScalar       one=1.0,zero=0.0;
1448   /* space to store constraints and their local indices */
1449   PetscScalar       *temp_quadrature_constraint;
1450   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1451   /* iterators */
1452   PetscInt          i,j,k,total_counts,temp_start_ptr;
1453   /* stuff to store connected components stored in pcbddc->mat_graph */
1454   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1455   PetscInt          n_ISForFaces,n_ISForEdges;
1456   /* near null space stuff */
1457   MatNullSpace      nearnullsp;
1458   const Vec         *nearnullvecs;
1459   Vec               *localnearnullsp;
1460   PetscBool         nnsp_has_cnst;
1461   PetscInt          nnsp_size;
1462   PetscScalar       *array;
1463   /* BLAS integers */
1464   PetscBLASInt      lwork,lierr;
1465   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1466   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1467   /* LAPACK working arrays for SVD or POD */
1468   PetscBool         skip_lapack;
1469   PetscScalar       *work;
1470   PetscReal         *singular_vals;
1471 #if defined(PETSC_USE_COMPLEX)
1472   PetscReal         *rwork;
1473 #endif
1474 #if defined(PETSC_MISSING_LAPACK_GESVD)
1475   PetscBLASInt      Blas_one_2=1;
1476   PetscScalar       *temp_basis,*correlation_mat;
1477 #else
1478   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1479   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1480 #endif
1481   /* reuse */
1482   PetscInt          olocal_primal_size;
1483   PetscInt          *oprimal_indices_local_idxs;
1484   /* change of basis */
1485   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1486   PetscBool         boolforchange,qr_needed;
1487   PetscBT           touched,change_basis,qr_needed_idx;
1488   /* auxiliary stuff */
1489   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1490   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1491   PetscScalar       *temp_quadrature_constraint_work;
1492   /* some quantities */
1493   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1494   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1495 
1496 
1497   PetscFunctionBegin;
1498   /* Destroy Mat objects computed previously */
1499   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1500   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1501   /* Get index sets for faces, edges and vertices from graph */
1502   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1503   /* free unneeded index sets */
1504   if (!pcbddc->use_vertices) {
1505     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1506   }
1507   if (!pcbddc->use_edges) {
1508     for (i=0;i<n_ISForEdges;i++) {
1509       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1510     }
1511     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1512     n_ISForEdges = 0;
1513   }
1514   if (!pcbddc->use_faces) {
1515     for (i=0;i<n_ISForFaces;i++) {
1516       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1517     }
1518     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1519     n_ISForFaces = 0;
1520   }
1521   /* HACKS (the following two blocks of code) */
1522   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1523     pcbddc->use_change_of_basis = PETSC_TRUE;
1524     if (!ISForEdges) {
1525       pcbddc->use_change_on_faces = PETSC_TRUE;
1526     }
1527   }
1528   if (pcbddc->NullSpace) {
1529     /* use_change_of_basis should be consistent among processors */
1530     PetscBool tbool[2],gbool[2];
1531     tbool [0] = pcbddc->use_change_of_basis;
1532     tbool [1] = pcbddc->use_change_on_faces;
1533     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1534     pcbddc->use_change_of_basis = gbool[0];
1535     pcbddc->use_change_on_faces = gbool[1];
1536   }
1537   /* print some info */
1538   if (pcbddc->dbg_flag) {
1539     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1540     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1541     i = 0;
1542     if (ISForVertices) {
1543       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1544     }
1545     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1546     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1547     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1548     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1549   }
1550   /* check if near null space is attached to global mat */
1551   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1552   if (nearnullsp) {
1553     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1554     /* remove any stored info */
1555     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1556     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1557     /* store information for BDDC solver reuse */
1558     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1559     pcbddc->onearnullspace = nearnullsp;
1560     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1561     for (i=0;i<nnsp_size;i++) {
1562       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1563     }
1564   } else { /* if near null space is not provided BDDC uses constants by default */
1565     nnsp_size = 0;
1566     nnsp_has_cnst = PETSC_TRUE;
1567   }
1568   /* get max number of constraints on a single cc */
1569   max_constraints = nnsp_size;
1570   if (nnsp_has_cnst) max_constraints++;
1571 
1572   /*
1573        Evaluate maximum storage size needed by the procedure
1574        - temp_indices will contain start index of each constraint stored as follows
1575        - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1576        - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1577        - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1578                                                                                                                                                          */
1579   total_counts = n_ISForFaces+n_ISForEdges;
1580   total_counts *= max_constraints;
1581   n_vertices = 0;
1582   if (ISForVertices) {
1583     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1584   }
1585   total_counts += n_vertices;
1586   ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1587   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1588   total_counts = 0;
1589   max_size_of_constraint = 0;
1590   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1591     if (i<n_ISForEdges) {
1592       used_IS = &ISForEdges[i];
1593     } else {
1594       used_IS = &ISForFaces[i-n_ISForEdges];
1595     }
1596     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1597     total_counts += j;
1598     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1599   }
1600   total_counts *= max_constraints;
1601   total_counts += n_vertices;
1602   ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1603   /* get local part of global near null space vectors */
1604   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1605   for (k=0;k<nnsp_size;k++) {
1606     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1607     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609   }
1610 
1611   /* whether or not to skip lapack calls */
1612   skip_lapack = PETSC_TRUE;
1613   if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1614 
1615   /* allocate some auxiliary stuff */
1616   if (!skip_lapack || pcbddc->use_qr_single) {
1617     ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1618   }
1619 
1620   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1621   if (!skip_lapack) {
1622     PetscScalar temp_work;
1623 
1624 #if defined(PETSC_MISSING_LAPACK_GESVD)
1625     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1626     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1627     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1628     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1629 #if defined(PETSC_USE_COMPLEX)
1630     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1631 #endif
1632     /* now we evaluate the optimal workspace using query with lwork=-1 */
1633     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1634     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1635     lwork = -1;
1636     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1637 #if !defined(PETSC_USE_COMPLEX)
1638     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1639 #else
1640     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1641 #endif
1642     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1643     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1644 #else /* on missing GESVD */
1645     /* SVD */
1646     PetscInt max_n,min_n;
1647     max_n = max_size_of_constraint;
1648     min_n = max_constraints;
1649     if (max_size_of_constraint < max_constraints) {
1650       min_n = max_size_of_constraint;
1651       max_n = max_constraints;
1652     }
1653     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1654 #if defined(PETSC_USE_COMPLEX)
1655     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1656 #endif
1657     /* now we evaluate the optimal workspace using query with lwork=-1 */
1658     lwork = -1;
1659     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1660     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1661     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1662     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1663 #if !defined(PETSC_USE_COMPLEX)
1664     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1665 #else
1666     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1667 #endif
1668     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1669     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1670 #endif /* on missing GESVD */
1671     /* Allocate optimal workspace */
1672     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1673     ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1674   }
1675   /* Now we can loop on constraining sets */
1676   total_counts = 0;
1677   temp_indices[0] = 0;
1678   /* vertices */
1679   if (ISForVertices) {
1680     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1681     if (nnsp_has_cnst) { /* consider all vertices */
1682       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1683       for (i=0;i<n_vertices;i++) {
1684         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1685         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1686         total_counts++;
1687       }
1688     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1689       PetscBool used_vertex;
1690       for (i=0;i<n_vertices;i++) {
1691         used_vertex = PETSC_FALSE;
1692         k = 0;
1693         while (!used_vertex && k<nnsp_size) {
1694           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1695           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1696             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1697             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1698             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1699             total_counts++;
1700             used_vertex = PETSC_TRUE;
1701           }
1702           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1703           k++;
1704         }
1705       }
1706     }
1707     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1708     n_vertices = total_counts;
1709   }
1710 
1711   /* edges and faces */
1712   for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1713     if (ncc<n_ISForEdges) {
1714       used_IS = &ISForEdges[ncc];
1715       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1716     } else {
1717       used_IS = &ISForFaces[ncc-n_ISForEdges];
1718       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1719     }
1720     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1721     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1722     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1723     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1724     /* change of basis should not be performed on local periodic nodes */
1725     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1726     if (nnsp_has_cnst) {
1727       PetscScalar quad_value;
1728       temp_constraints++;
1729       if (!pcbddc->use_nnsp_true) {
1730         quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1731       } else {
1732         quad_value = 1.0;
1733       }
1734       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1735       for (j=0;j<size_of_constraint;j++) {
1736         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1737       }
1738       /* sort by global ordering if using lapack subroutines */
1739       if (!skip_lapack || pcbddc->use_qr_single) {
1740         ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1741         for (j=0;j<size_of_constraint;j++) {
1742           permutation[j]=j;
1743         }
1744         ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1745         for (j=0;j<size_of_constraint;j++) {
1746           temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1747           temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1748         }
1749         ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1750         ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1751       }
1752       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1753       total_counts++;
1754     }
1755     for (k=0;k<nnsp_size;k++) {
1756       PetscReal real_value;
1757       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1758       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1759       for (j=0;j<size_of_constraint;j++) {
1760         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1761       }
1762       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1763       /* check if array is null on the connected component */
1764       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1765       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1766       if (real_value > 0.0) { /* keep indices and values */
1767         /* sort by global ordering if using lapack subroutines */
1768         if (!skip_lapack || pcbddc->use_qr_single) {
1769           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1770           for (j=0;j<size_of_constraint;j++) {
1771             permutation[j]=j;
1772           }
1773           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1774           for (j=0;j<size_of_constraint;j++) {
1775             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1776             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1777           }
1778           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1779           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1780         }
1781         temp_constraints++;
1782         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1783         total_counts++;
1784       }
1785     }
1786     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1787     valid_constraints = temp_constraints;
1788     if (!pcbddc->use_nnsp_true && temp_constraints) {
1789       if (temp_constraints == 1) { /* just normalize the constraint */
1790         PetscScalar norm;
1791         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1792         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1793         norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
1794         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
1795       } else { /* perform SVD */
1796         PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1797 
1798 #if defined(PETSC_MISSING_LAPACK_GESVD)
1799         /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1800            POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1801            -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1802               the constraints basis will differ (by a complex factor with absolute value equal to 1)
1803               from that computed using LAPACKgesvd
1804            -> This is due to a different computation of eigenvectors in LAPACKheev
1805            -> The quality of the POD-computed basis will be the same */
1806         ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1807         /* Store upper triangular part of correlation matrix */
1808         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1809         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1810         for (j=0;j<temp_constraints;j++) {
1811           for (k=0;k<j+1;k++) {
1812             PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1813           }
1814         }
1815         /* compute eigenvalues and eigenvectors of correlation matrix */
1816         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1817         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1818 #if !defined(PETSC_USE_COMPLEX)
1819         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1820 #else
1821         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1822 #endif
1823         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1824         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1825         /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1826         j = 0;
1827         while (j < temp_constraints && singular_vals[j] < tol) j++;
1828         total_counts = total_counts-j;
1829         valid_constraints = temp_constraints-j;
1830         /* scale and copy POD basis into used quadrature memory */
1831         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1832         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1833         ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1834         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1835         ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1836         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1837         if (j<temp_constraints) {
1838           PetscInt ii;
1839           for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1840           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1841           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1842           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1843           for (k=0;k<temp_constraints-j;k++) {
1844             for (ii=0;ii<size_of_constraint;ii++) {
1845               temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1846             }
1847           }
1848         }
1849 #else  /* on missing GESVD */
1850         ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1851         ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1852         ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1853         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1854 #if !defined(PETSC_USE_COMPLEX)
1855         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1856 #else
1857         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1858 #endif
1859         if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1860         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1861         /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1862         k = temp_constraints;
1863         if (k > size_of_constraint) k = size_of_constraint;
1864         j = 0;
1865         while (j < k && singular_vals[k-j-1] < tol) j++;
1866         valid_constraints = k-j;
1867         total_counts = total_counts-temp_constraints+valid_constraints;
1868 #endif /* on missing GESVD */
1869       }
1870     }
1871     /* setting change_of_basis flag is safe now */
1872     if (boolforchange) {
1873       for (j=0;j<valid_constraints;j++) {
1874         PetscBTSet(change_basis,total_counts-j-1);
1875       }
1876     }
1877   }
1878   /* free index sets of faces, edges and vertices */
1879   for (i=0;i<n_ISForFaces;i++) {
1880     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1881   }
1882   if (n_ISForFaces) {
1883     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1884   }
1885   for (i=0;i<n_ISForEdges;i++) {
1886     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1887   }
1888   if (n_ISForEdges) {
1889     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1890   }
1891   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1892   /* map temp_indices_to_constraint in boundary numbering */
1893   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1894   if (i != temp_indices[total_counts]) {
1895     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1896   }
1897 
1898   /* free workspace */
1899   if (!skip_lapack || pcbddc->use_qr_single) {
1900     ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
1901   }
1902   if (!skip_lapack) {
1903     ierr = PetscFree(work);CHKERRQ(ierr);
1904 #if defined(PETSC_USE_COMPLEX)
1905     ierr = PetscFree(rwork);CHKERRQ(ierr);
1906 #endif
1907     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1908 #if defined(PETSC_MISSING_LAPACK_GESVD)
1909     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1910     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1911 #endif
1912   }
1913   for (k=0;k<nnsp_size;k++) {
1914     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1915   }
1916   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1917 
1918   /* set quantities in pcbddc data structure and store previous primal size */
1919   /* n_vertices defines the number of subdomain corners in the primal space */
1920   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1921   olocal_primal_size = pcbddc->local_primal_size;
1922   pcbddc->local_primal_size = total_counts;
1923   pcbddc->n_vertices = n_vertices;
1924   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1925 
1926   /* Create constraint matrix */
1927   /* The constraint matrix is used to compute the l2g map of primal dofs */
1928   /* so we need to set it up properly either with or without change of basis */
1929   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1930   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1931   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1932   /* array to compute a local numbering of constraints : vertices first then constraints */
1933   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1934   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1935   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1936   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1937   /* auxiliary stuff for basis change */
1938   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1939   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1940 
1941   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1942   total_primal_vertices=0;
1943   for (i=0;i<pcbddc->local_primal_size;i++) {
1944     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1945     if (size_of_constraint == 1) {
1946       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1947       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1948       aux_primal_minloc[total_primal_vertices]=0;
1949       total_primal_vertices++;
1950     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1951       PetscInt min_loc,min_index;
1952       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1953       /* find first untouched local node */
1954       k = 0;
1955       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1956       min_index = global_indices[k];
1957       min_loc = k;
1958       /* search the minimum among global nodes already untouched on the cc */
1959       for (k=1;k<size_of_constraint;k++) {
1960         /* there can be more than one constraint on a single connected component */
1961         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1962           min_index = global_indices[k];
1963           min_loc = k;
1964         }
1965       }
1966       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1967       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1968       aux_primal_minloc[total_primal_vertices]=min_loc;
1969       total_primal_vertices++;
1970     }
1971   }
1972   /* determine if a QR strategy is needed for change of basis */
1973   qr_needed = PETSC_FALSE;
1974   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1975   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1976     if (PetscBTLookup(change_basis,i)) {
1977       if (!pcbddc->use_qr_single) {
1978         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1979         j = 0;
1980         for (k=0;k<size_of_constraint;k++) {
1981           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1982             j++;
1983           }
1984         }
1985         /* found more than one primal dof on the cc */
1986         if (j > 1) {
1987           PetscBTSet(qr_needed_idx,i);
1988           qr_needed = PETSC_TRUE;
1989         }
1990       } else {
1991         PetscBTSet(qr_needed_idx,i);
1992         qr_needed = PETSC_TRUE;
1993       }
1994     }
1995   }
1996   /* free workspace */
1997   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1998 
1999   /* permute indices in order to have a sorted set of vertices */
2000   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2001 
2002   /* nonzero structure of constraint matrix */
2003   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2004   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2005   j=total_primal_vertices;
2006   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2007     if (!PetscBTLookup(change_basis,i)) {
2008       nnz[j]=temp_indices[i+1]-temp_indices[i];
2009       j++;
2010     }
2011   }
2012   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2013   ierr = PetscFree(nnz);CHKERRQ(ierr);
2014   /* set values in constraint matrix */
2015   for (i=0;i<total_primal_vertices;i++) {
2016     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2017   }
2018   total_counts = total_primal_vertices;
2019   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2020     if (!PetscBTLookup(change_basis,i)) {
2021       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2022       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2023       total_counts++;
2024     }
2025   }
2026   /* assembling */
2027   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   /*
2030   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2031   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2032   */
2033   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2034   if (pcbddc->use_change_of_basis) {
2035     /* dual and primal dofs on a single cc */
2036     PetscInt     dual_dofs,primal_dofs;
2037     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2038     PetscInt     primal_counter;
2039     /* working stuff for GEQRF */
2040     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2041     PetscBLASInt lqr_work;
2042     /* working stuff for UNGQR */
2043     PetscScalar  *gqr_work,lgqr_work_t;
2044     PetscBLASInt lgqr_work;
2045     /* working stuff for TRTRS */
2046     PetscScalar  *trs_rhs;
2047     PetscBLASInt Blas_NRHS;
2048     /* pointers for values insertion into change of basis matrix */
2049     PetscInt     *start_rows,*start_cols;
2050     PetscScalar  *start_vals;
2051     /* working stuff for values insertion */
2052     PetscBT      is_primal;
2053     /* matrix sizes */
2054     PetscInt     global_size,local_size;
2055     /* work array for nonzeros */
2056     PetscScalar  *nnz_array;
2057     /* temporary change of basis */
2058     Mat          localChangeOfBasisMatrix;
2059     /* auxiliary work for global change of basis */
2060     Vec          nnz_vec;
2061     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2062     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2063     PetscScalar  *vals;
2064     PetscBool    done;
2065 
2066     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2067     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2068     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2069     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2070 
2071     /* nonzeros for local mat */
2072     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2073     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2074     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2075       if (PetscBTLookup(change_basis,i)) {
2076         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2077         if (PetscBTLookup(qr_needed_idx,i)) {
2078           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2079         } else {
2080           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2081           /* get local primal index on the cc */
2082           j = 0;
2083           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2084           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2085         }
2086       }
2087     }
2088     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2089     /* Set initial identity in the matrix */
2090     for (i=0;i<pcis->n_B;i++) {
2091       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2092     }
2093 
2094     if (pcbddc->dbg_flag) {
2095       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2096       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2097     }
2098 
2099 
2100     /* Now we loop on the constraints which need a change of basis */
2101     /*
2102        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2103        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2104 
2105        Basic blocks of change of basis matrix T computed by
2106 
2107           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2108 
2109             | 1        0   ...        0         s_1/S |
2110             | 0        1   ...        0         s_2/S |
2111             |              ...                        |
2112             | 0        ...            1     s_{n-1}/S |
2113             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2114 
2115             with S = \sum_{i=1}^n s_i^2
2116             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2117                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2118 
2119           - QR decomposition of constraints otherwise
2120     */
2121     if (qr_needed) {
2122       /* space to store Q */
2123       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2124       /* first we issue queries for optimal work */
2125       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2126       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2127       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2128       lqr_work = -1;
2129       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2130       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2131       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2132       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2133       lgqr_work = -1;
2134       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2135       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2136       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2137       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2138       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2139       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2140       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2141       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2142       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2143       /* array to store scaling factors for reflectors */
2144       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2145       /* array to store rhs and solution of triangular solver */
2146       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2147       /* allocating workspace for check */
2148       if (pcbddc->dbg_flag) {
2149         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2150       }
2151     }
2152     /* array to store whether a node is primal or not */
2153     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2154     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2155     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2156     if (i != total_primal_vertices) {
2157       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2158     }
2159     for (i=0;i<total_primal_vertices;i++) {
2160       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2161     }
2162     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2163 
2164     /* loop on constraints and see whether or not they need a change of basis and compute it */
2165     /* -> using implicit ordering contained in temp_indices data */
2166     total_counts = pcbddc->n_vertices;
2167     primal_counter = total_counts;
2168     while (total_counts<pcbddc->local_primal_size) {
2169       primal_dofs = 1;
2170       if (PetscBTLookup(change_basis,total_counts)) {
2171         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2172         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2173           primal_dofs++;
2174         }
2175         /* get constraint info */
2176         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2177         dual_dofs = size_of_constraint-primal_dofs;
2178 
2179         if (pcbddc->dbg_flag) {
2180           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2181         }
2182 
2183         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2184 
2185           /* copy quadrature constraints for change of basis check */
2186           if (pcbddc->dbg_flag) {
2187             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2188           }
2189           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2190           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2191 
2192           /* compute QR decomposition of constraints */
2193           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2194           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2195           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2196           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2197           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2198           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2199           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2200 
2201           /* explictly compute R^-T */
2202           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2203           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2204           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2205           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2206           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2207           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2208           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2209           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2210           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2211           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2212 
2213           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2214           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2215           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2216           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2217           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2218           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2219           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2220           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2221           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2222 
2223           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2224              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2225              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2226           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2227           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2228           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2229           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2230           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2231           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2232           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2233           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2234           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2235           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2236 
2237           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2238           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2239           /* insert cols for primal dofs */
2240           for (j=0;j<primal_dofs;j++) {
2241             start_vals = &qr_basis[j*size_of_constraint];
2242             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2243             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2244           }
2245           /* insert cols for dual dofs */
2246           for (j=0,k=0;j<dual_dofs;k++) {
2247             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2248               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2249               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2250               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2251               j++;
2252             }
2253           }
2254 
2255           /* check change of basis */
2256           if (pcbddc->dbg_flag) {
2257             PetscInt   ii,jj;
2258             PetscBool valid_qr=PETSC_TRUE;
2259             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2260             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2261             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2262             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2263             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2264             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2265             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2266             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2267             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2268             for (jj=0;jj<size_of_constraint;jj++) {
2269               for (ii=0;ii<primal_dofs;ii++) {
2270                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2271                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2272               }
2273             }
2274             if (!valid_qr) {
2275               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2276               for (jj=0;jj<size_of_constraint;jj++) {
2277                 for (ii=0;ii<primal_dofs;ii++) {
2278                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2279                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2280                   }
2281                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2282                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2283                   }
2284                 }
2285               }
2286             } else {
2287               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2288             }
2289           }
2290         } else { /* simple transformation block */
2291           PetscInt    row,col;
2292           PetscScalar val,norm;
2293 
2294           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2295           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2296           for (j=0;j<size_of_constraint;j++) {
2297             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2298             if (!PetscBTLookup(is_primal,row)) {
2299               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2300               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2301               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2302             } else {
2303               for (k=0;k<size_of_constraint;k++) {
2304                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2305                 if (row != col) {
2306                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2307                 } else {
2308                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2309                 }
2310                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2311               }
2312             }
2313           }
2314           if (pcbddc->dbg_flag) {
2315             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2316           }
2317         }
2318         /* increment primal counter */
2319         primal_counter += primal_dofs;
2320       } else {
2321         if (pcbddc->dbg_flag) {
2322           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2323         }
2324       }
2325       /* increment constraint counter total_counts */
2326       total_counts += primal_dofs;
2327     }
2328 
2329     /* free workspace */
2330     if (qr_needed) {
2331       if (pcbddc->dbg_flag) {
2332         ierr = PetscFree(work);CHKERRQ(ierr);
2333       }
2334       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2335       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2336       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2337       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2338       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2339     }
2340     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2341     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343 
2344     /* assembling of global change of variable */
2345     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2346     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2347     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2348     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2349     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2350     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2351 
2352     /* nonzeros (overestimated) */
2353     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2354     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2355     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2356     for (i=0;i<pcis->n;i++) {
2357       nnz_array[i] = 1.0;
2358       idxs_all[i] = i;
2359     }
2360     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2361     for (i=0;i<pcis->n_B;i++) {
2362       nnz_array[idxs_B[i]] = nnz[i];
2363     }
2364     if (pcis->n) {
2365       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2366     }
2367     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2368     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2369     ierr = PetscFree(nnz);CHKERRQ(ierr);
2370     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2371     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2372     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2373     for (i=0;i<local_size;i++) {
2374       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2375       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2376     }
2377     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2378     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2379     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2380     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2381 
2382     /* Set identity on dirichlet dofs */
2383     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2384     for (i=0;i<pcis->n-pcis->n_B;i++) {
2385       PetscScalar one=1.0;
2386       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2387     }
2388     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2389 
2390     /* Set values at interface dofs */
2391     done = PETSC_TRUE;
2392     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2393     if (!done) {
2394       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2395     }
2396     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2397     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2398     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2399     for (i=0;i<nvtxs;i++) {
2400       PetscInt    row,*cols,ncols;
2401       PetscScalar *mat_vals;
2402 
2403       row = idxs_B[i];
2404       ncols = xadj[i+1]-xadj[i];
2405       cols = idxs_mapped+xadj[i];
2406       mat_vals = vals+xadj[i];
2407       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2408     }
2409     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2410     if (!done) {
2411       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2412     }
2413     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2414     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2415     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2416     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2417     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2418 
2419     /* check */
2420     if (pcbddc->dbg_flag) {
2421       PetscReal error;
2422       Vec       x,x_change;
2423 
2424       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2425       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2426       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2427       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2428       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2431       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2432       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2433       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2434       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2435       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2436       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2437       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2438       ierr = VecDestroy(&x);CHKERRQ(ierr);
2439       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2440     }
2441     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2442   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2443     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2444     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2445   }
2446 
2447   /* set up change of basis context */
2448   if (pcbddc->ChangeOfBasisMatrix) {
2449     PCBDDCChange_ctx change_ctx;
2450 
2451     if (!pcbddc->new_global_mat) {
2452       PetscInt global_size,local_size;
2453 
2454       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2455       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2456       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2457       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2458       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2459       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2460       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2461       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2462       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2463     } else {
2464       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2465       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2466       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2467     }
2468     if (!pcbddc->user_ChangeOfBasisMatrix) {
2469       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2470       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2471     } else {
2472       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2473       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2474     }
2475     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2476     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2477     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   }
2480 
2481   /* get indices in local ordering for vertices and constraints */
2482   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2483     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2484     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2485   }
2486   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2487   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2488   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2489   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2490   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2491   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2492   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2493   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2494   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2495   /* set quantities in PCBDDC data struct */
2496   pcbddc->n_actual_vertices = i;
2497   /* check if a new primal space has been introduced */
2498   pcbddc->new_primal_space_local = PETSC_TRUE;
2499   if (olocal_primal_size == pcbddc->local_primal_size) {
2500     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2501     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2502     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2503   }
2504   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2505   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2506 
2507   /* flush dbg viewer */
2508   if (pcbddc->dbg_flag) {
2509     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2510   }
2511 
2512   /* free workspace */
2513   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2514   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2515   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2516   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2517   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2518   ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2524 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2525 {
2526   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2527   PC_IS       *pcis = (PC_IS*)pc->data;
2528   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2529   PetscInt    ierr,i,vertex_size;
2530   PetscViewer viewer=pcbddc->dbg_viewer;
2531 
2532   PetscFunctionBegin;
2533   /* Reset previously computed graph */
2534   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2535   /* Init local Graph struct */
2536   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2537 
2538   /* Check validity of the csr graph passed in by the user */
2539   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2540     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2541   }
2542 
2543   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2544   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2545     Mat       mat_adj;
2546     PetscInt  *xadj,*adjncy;
2547     PetscInt  nvtxs;
2548     PetscBool flg_row=PETSC_TRUE;
2549 
2550     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2551     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2552     if (!flg_row) {
2553       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2554     }
2555     if (pcbddc->use_local_adj) {
2556       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2557       pcbddc->deluxe_compute_rowadj = PETSC_FALSE;
2558     } else { /* just compute subdomain's connected components */
2559       IS                     is_dummy;
2560       ISLocalToGlobalMapping l2gmap_dummy;
2561       PetscInt               j,sum;
2562       PetscInt               *cxadj,*cadjncy;
2563       const PetscInt         *idxs;
2564       PCBDDCGraph            graph;
2565       PetscBT                is_on_boundary;
2566 
2567       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2568       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2569       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2570       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2571       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2572       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2573       graph->xadj = xadj;
2574       graph->adjncy = adjncy;
2575       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2576       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2577 
2578       if (pcbddc->dbg_flag) {
2579         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2580         for (i=0;i<graph->ncc;i++) {
2581           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2582         }
2583         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2584       }
2585 
2586       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2587       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2588       for (i=0;i<pcis->n_B;i++) {
2589         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2590       }
2591       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2592 
2593       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2594       sum = 0;
2595       for (i=0;i<graph->ncc;i++) {
2596         PetscInt sizecc = 0;
2597         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2598           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2599             sizecc++;
2600           }
2601         }
2602         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2603           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2604             cxadj[graph->queue[j]] = sizecc;
2605           }
2606         }
2607         sum += sizecc*sizecc;
2608       }
2609       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2610       sum = 0;
2611       for (i=0;i<nvtxs;i++) {
2612         PetscInt temp = cxadj[i];
2613         cxadj[i] = sum;
2614         sum += temp;
2615       }
2616       cxadj[nvtxs] = sum;
2617       for (i=0;i<graph->ncc;i++) {
2618         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2619           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2620             PetscInt k,sizecc = 0;
2621             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2622               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2623                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2624                 sizecc++;
2625               }
2626             }
2627           }
2628         }
2629       }
2630       if (nvtxs) {
2631         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2632       } else {
2633         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2634         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2635       }
2636       graph->xadj = 0;
2637       graph->adjncy = 0;
2638       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2639       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2640     }
2641     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2642     if (!flg_row) {
2643       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2644     }
2645     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2646   }
2647 
2648   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2649   vertex_size = 1;
2650   if (pcbddc->user_provided_isfordofs) {
2651     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2652       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2653       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2654         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2655         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2656       }
2657       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2658       pcbddc->n_ISForDofs = 0;
2659       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2660     }
2661     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2662     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2663   } else {
2664     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2665       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2666       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2667       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2668         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2669       }
2670     }
2671   }
2672 
2673   /* Setup of Graph */
2674   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2675     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2676   }
2677   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2678     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2679   }
2680   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2681 
2682   /* Graph's connected components analysis */
2683   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2684 
2685   /* print some info to stdout */
2686   if (pcbddc->dbg_flag) {
2687     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2688   }
2689 
2690   /* mark topography has done */
2691   pcbddc->recompute_topography = PETSC_FALSE;
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2697 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2698 {
2699   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2700   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2701   PetscErrorCode ierr;
2702 
2703   PetscFunctionBegin;
2704   n = 0;
2705   vertices = 0;
2706   if (pcbddc->ConstraintMatrix) {
2707     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2708     for (i=0;i<local_primal_size;i++) {
2709       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2710       if (size_of_constraint == 1) n++;
2711       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2712     }
2713     if (vertices_idx) {
2714       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2715       n = 0;
2716       for (i=0;i<local_primal_size;i++) {
2717         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2718         if (size_of_constraint == 1) {
2719           vertices[n++]=row_cmat_indices[0];
2720         }
2721         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2722       }
2723     }
2724   }
2725   *n_vertices = n;
2726   if (vertices_idx) *vertices_idx = vertices;
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2732 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2733 {
2734   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2735   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2736   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2737   PetscBT        touched;
2738   PetscErrorCode ierr;
2739 
2740     /* This function assumes that the number of local constraints per connected component
2741        is not greater than the number of nodes defined for the connected component
2742        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2743   PetscFunctionBegin;
2744   n = 0;
2745   constraints_index = 0;
2746   if (pcbddc->ConstraintMatrix) {
2747     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2748     max_size_of_constraint = 0;
2749     for (i=0;i<local_primal_size;i++) {
2750       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2751       if (size_of_constraint > 1) {
2752         n++;
2753       }
2754       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2755       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2756     }
2757     if (constraints_idx) {
2758       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2759       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2760       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2761       n = 0;
2762       for (i=0;i<local_primal_size;i++) {
2763         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2764         if (size_of_constraint > 1) {
2765           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2766           /* find first untouched local node */
2767           j = 0;
2768           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2769           min_index = row_cmat_global_indices[j];
2770           min_loc = j;
2771           /* search the minimum among nodes not yet touched on the connected component
2772              since there can be more than one constraint on a single cc */
2773           for (j=1;j<size_of_constraint;j++) {
2774             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2775               min_index = row_cmat_global_indices[j];
2776               min_loc = j;
2777             }
2778           }
2779           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2780           constraints_index[n++] = row_cmat_indices[min_loc];
2781         }
2782         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2783       }
2784       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2785       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2786     }
2787   }
2788   *n_constraints = n;
2789   if (constraints_idx) *constraints_idx = constraints_index;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "PCBDDCSubsetNumbering"
2795 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2796 {
2797   Vec            local_vec,global_vec;
2798   IS             seqis,paris;
2799   VecScatter     scatter_ctx;
2800   PetscScalar    *array;
2801   PetscInt       *temp_global_dofs;
2802   PetscScalar    globalsum;
2803   PetscInt       i,j,s;
2804   PetscInt       nlocals,first_index,old_index,max_local;
2805   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2806   PetscMPIInt    *dof_sizes,*dof_displs;
2807   PetscBool      first_found;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   /* mpi buffers */
2812   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2813   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2814   j = ( !rank_prec_comm ? size_prec_comm : 0);
2815   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2816   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2817   /* get maximum size of subset */
2818   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2819   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2820   max_local = 0;
2821   for (i=0;i<n_local_dofs;i++) {
2822     if (max_local < temp_global_dofs[i] ) {
2823       max_local = temp_global_dofs[i];
2824     }
2825   }
2826   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2827   max_global++;
2828   max_local = 0;
2829   for (i=0;i<n_local_dofs;i++) {
2830     if (max_local < local_dofs[i] ) {
2831       max_local = local_dofs[i];
2832     }
2833   }
2834   max_local++;
2835   /* allocate workspace */
2836   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2837   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2838   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2839   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2840   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2841   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2842   /* create scatter */
2843   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2844   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2845   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2846   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2847   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2848   /* init array */
2849   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2850   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2851   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2852   if (local_dofs_mult) {
2853     for (i=0;i<n_local_dofs;i++) {
2854       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2855     }
2856   } else {
2857     for (i=0;i<n_local_dofs;i++) {
2858       array[local_dofs[i]]=1.0;
2859     }
2860   }
2861   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2862   /* scatter into global vec and get total number of global dofs */
2863   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2864   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2865   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2866   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2867   /* Fill global_vec with cumulative function for global numbering */
2868   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2869   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2870   nlocals = 0;
2871   first_index = -1;
2872   first_found = PETSC_FALSE;
2873   for (i=0;i<s;i++) {
2874     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2875       first_found = PETSC_TRUE;
2876       first_index = i;
2877     }
2878     nlocals += (PetscInt)PetscRealPart(array[i]);
2879   }
2880   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2881   if (!rank_prec_comm) {
2882     dof_displs[0]=0;
2883     for (i=1;i<size_prec_comm;i++) {
2884       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2885     }
2886   }
2887   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2888   if (first_found) {
2889     array[first_index] += (PetscScalar)nlocals;
2890     old_index = first_index;
2891     for (i=first_index+1;i<s;i++) {
2892       if (PetscRealPart(array[i]) > 0.1) {
2893         array[i] += array[old_index];
2894         old_index = i;
2895       }
2896     }
2897   }
2898   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2899   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2900   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2901   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2902   /* get global ordering of local dofs */
2903   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2904   if (local_dofs_mult) {
2905     for (i=0;i<n_local_dofs;i++) {
2906       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2907     }
2908   } else {
2909     for (i=0;i<n_local_dofs;i++) {
2910       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2911     }
2912   }
2913   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2914   /* free workspace */
2915   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2916   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2917   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2918   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2919   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2920   /* return pointer to global ordering of local dofs */
2921   *global_numbering_subset = temp_global_dofs;
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 #undef __FUNCT__
2926 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2927 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2928 {
2929   PetscInt       i,j;
2930   PetscScalar    *alphas;
2931   PetscErrorCode ierr;
2932 
2933   PetscFunctionBegin;
2934   /* this implements stabilized Gram-Schmidt */
2935   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2936   for (i=0;i<n;i++) {
2937     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2938     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2939     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2940   }
2941   ierr = PetscFree(alphas);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatISGetSubassemblingPattern"
2947 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2948 {
2949   Mat             subdomain_adj;
2950   IS              new_ranks,ranks_send_to;
2951   MatPartitioning partitioner;
2952   Mat_IS          *matis;
2953   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2954   PetscInt        prank;
2955   PetscMPIInt     size,rank,color;
2956   PetscInt        *xadj,*adjncy,*oldranks;
2957   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2958   PetscInt        i,local_size,threshold=0;
2959   PetscErrorCode  ierr;
2960   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2961   PetscSubcomm    subcomm;
2962 
2963   PetscFunctionBegin;
2964   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2965   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2966   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2967 
2968   /* Get info on mapping */
2969   matis = (Mat_IS*)(mat->data);
2970   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2971   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2972 
2973   /* build local CSR graph of subdomains' connectivity */
2974   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2975   xadj[0] = 0;
2976   xadj[1] = PetscMax(n_neighs-1,0);
2977   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2978   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2979 
2980   if (threshold) {
2981     PetscInt xadj_count = 0;
2982     for (i=1;i<n_neighs;i++) {
2983       if (n_shared[i] > threshold) {
2984         adjncy[xadj_count] = neighs[i];
2985         adjncy_wgt[xadj_count] = n_shared[i];
2986         xadj_count++;
2987       }
2988     }
2989     xadj[1] = xadj_count;
2990   } else {
2991     if (xadj[1]) {
2992       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2993       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2994     }
2995   }
2996   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2997   if (use_square) {
2998     for (i=0;i<xadj[1];i++) {
2999       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3000     }
3001   }
3002   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3003 
3004   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3005 
3006   /*
3007     Restrict work on active processes only.
3008   */
3009   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3010   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3011   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3012   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3013   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3014   if (color) {
3015     ierr = PetscFree(xadj);CHKERRQ(ierr);
3016     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3017     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3018   } else {
3019     PetscInt coarsening_ratio;
3020     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3021     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3022     prank = rank;
3023     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3024     /*
3025     for (i=0;i<size;i++) {
3026       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3027     }
3028     */
3029     for (i=0;i<xadj[1];i++) {
3030       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3031     }
3032     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3033     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3034     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3035 
3036     /* Partition */
3037     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3038     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3039     if (use_vwgt) {
3040       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3041       v_wgt[0] = local_size;
3042       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3043     }
3044     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3045     coarsening_ratio = size/n_subdomains;
3046     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3047     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3048     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3049     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3050 
3051     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3052     if (contiguous) {
3053       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3054     } else {
3055       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3056     }
3057     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3058     /* clean up */
3059     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3060     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3061     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3062     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3063   }
3064   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3065 
3066   /* assemble parallel IS for sends */
3067   i = 1;
3068   if (color) i=0;
3069   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3070 
3071   /* get back IS */
3072   *is_sends = ranks_send_to;
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatISSubassemble"
3080 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3081 {
3082   Mat                    local_mat;
3083   Mat_IS                 *matis;
3084   IS                     is_sends_internal;
3085   PetscInt               rows,cols,new_local_rows;
3086   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3087   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3088   ISLocalToGlobalMapping l2gmap;
3089   PetscInt*              l2gmap_indices;
3090   const PetscInt*        is_indices;
3091   MatType                new_local_type;
3092   /* buffers */
3093   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3094   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3095   PetscInt               *recv_buffer_idxs_local;
3096   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3097   /* MPI */
3098   MPI_Comm               comm,comm_n;
3099   PetscSubcomm           subcomm;
3100   PetscMPIInt            n_sends,n_recvs,commsize;
3101   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3102   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3103   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3104   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3105   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3106   PetscErrorCode         ierr;
3107 
3108   PetscFunctionBegin;
3109   /* TODO: add missing checks */
3110   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3111   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3112   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3113   PetscValidLogicalCollectiveInt(mat,nis,7);
3114   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3115   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3116   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3117   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3118   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3119   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3120   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3121   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3122     PetscInt mrows,mcols,mnrows,mncols;
3123     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3124     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3125     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3126     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3127     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3128     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3129   }
3130   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3131   PetscValidLogicalCollectiveInt(mat,bs,0);
3132   /* prepare IS for sending if not provided */
3133   if (!is_sends) {
3134     PetscBool pcontig = PETSC_TRUE;
3135     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3136     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3137   } else {
3138     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3139     is_sends_internal = is_sends;
3140   }
3141 
3142   /* get pointer of MATIS data */
3143   matis = (Mat_IS*)mat->data;
3144 
3145   /* get comm */
3146   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3147 
3148   /* compute number of sends */
3149   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3150   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3151 
3152   /* compute number of receives */
3153   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3154   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3155   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3156   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3157   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3158   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3159   ierr = PetscFree(iflags);CHKERRQ(ierr);
3160 
3161   /* restrict comm if requested */
3162   subcomm = 0;
3163   destroy_mat = PETSC_FALSE;
3164   if (restrict_comm) {
3165     PetscMPIInt color,rank,subcommsize;
3166     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3167     color = 0;
3168     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3169     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3170     subcommsize = commsize - subcommsize;
3171     /* check if reuse has been requested */
3172     if (reuse == MAT_REUSE_MATRIX) {
3173       if (*mat_n) {
3174         PetscMPIInt subcommsize2;
3175         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3176         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3177         comm_n = PetscObjectComm((PetscObject)*mat_n);
3178       } else {
3179         comm_n = PETSC_COMM_SELF;
3180       }
3181     } else { /* MAT_INITIAL_MATRIX */
3182       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3183       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3184       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3185       comm_n = subcomm->comm;
3186     }
3187     /* flag to destroy *mat_n if not significative */
3188     if (color) destroy_mat = PETSC_TRUE;
3189   } else {
3190     comm_n = comm;
3191   }
3192 
3193   /* prepare send/receive buffers */
3194   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3195   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3196   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3197   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3198   if (nis) {
3199     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3200   }
3201 
3202   /* Get data from local matrices */
3203   if (!isdense) {
3204     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3205     /* TODO: See below some guidelines on how to prepare the local buffers */
3206     /*
3207        send_buffer_vals should contain the raw values of the local matrix
3208        send_buffer_idxs should contain:
3209        - MatType_PRIVATE type
3210        - PetscInt        size_of_l2gmap
3211        - PetscInt        global_row_indices[size_of_l2gmap]
3212        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3213     */
3214   } else {
3215     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3216     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3217     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3218     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3219     send_buffer_idxs[1] = i;
3220     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3221     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3222     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3223     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3224     for (i=0;i<n_sends;i++) {
3225       ilengths_vals[is_indices[i]] = len*len;
3226       ilengths_idxs[is_indices[i]] = len+2;
3227     }
3228   }
3229   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3230   /* additional is (if any) */
3231   if (nis) {
3232     PetscMPIInt psum;
3233     PetscInt j;
3234     for (j=0,psum=0;j<nis;j++) {
3235       PetscInt plen;
3236       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3237       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3238       psum += len+1; /* indices + lenght */
3239     }
3240     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3241     for (j=0,psum=0;j<nis;j++) {
3242       PetscInt plen;
3243       const PetscInt *is_array_idxs;
3244       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3245       send_buffer_idxs_is[psum] = plen;
3246       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3247       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3248       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3249       psum += plen+1; /* indices + lenght */
3250     }
3251     for (i=0;i<n_sends;i++) {
3252       ilengths_idxs_is[is_indices[i]] = psum;
3253     }
3254     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3255   }
3256 
3257   buf_size_idxs = 0;
3258   buf_size_vals = 0;
3259   buf_size_idxs_is = 0;
3260   for (i=0;i<n_recvs;i++) {
3261     buf_size_idxs += (PetscInt)olengths_idxs[i];
3262     buf_size_vals += (PetscInt)olengths_vals[i];
3263     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3264   }
3265   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3266   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3267   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3268 
3269   /* get new tags for clean communications */
3270   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3271   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3272   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3273 
3274   /* allocate for requests */
3275   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3276   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3277   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3278   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3279   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3280   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3281 
3282   /* communications */
3283   ptr_idxs = recv_buffer_idxs;
3284   ptr_vals = recv_buffer_vals;
3285   ptr_idxs_is = recv_buffer_idxs_is;
3286   for (i=0;i<n_recvs;i++) {
3287     source_dest = onodes[i];
3288     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3289     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3290     ptr_idxs += olengths_idxs[i];
3291     ptr_vals += olengths_vals[i];
3292     if (nis) {
3293       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3294       ptr_idxs_is += olengths_idxs_is[i];
3295     }
3296   }
3297   for (i=0;i<n_sends;i++) {
3298     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3299     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3300     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3301     if (nis) {
3302       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3303     }
3304   }
3305   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3306   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3307 
3308   /* assemble new l2g map */
3309   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3310   ptr_idxs = recv_buffer_idxs;
3311   new_local_rows = 0;
3312   for (i=0;i<n_recvs;i++) {
3313     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3314     ptr_idxs += olengths_idxs[i];
3315   }
3316   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3317   ptr_idxs = recv_buffer_idxs;
3318   new_local_rows = 0;
3319   for (i=0;i<n_recvs;i++) {
3320     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3321     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3322     ptr_idxs += olengths_idxs[i];
3323   }
3324   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3325   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3326   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3327 
3328   /* infer new local matrix type from received local matrices type */
3329   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3330   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3331   if (n_recvs) {
3332     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3333     ptr_idxs = recv_buffer_idxs;
3334     for (i=0;i<n_recvs;i++) {
3335       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3336         new_local_type_private = MATAIJ_PRIVATE;
3337         break;
3338       }
3339       ptr_idxs += olengths_idxs[i];
3340     }
3341     switch (new_local_type_private) {
3342       case MATDENSE_PRIVATE:
3343         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3344           new_local_type = MATSEQAIJ;
3345           bs = 1;
3346         } else { /* if I receive only 1 dense matrix */
3347           new_local_type = MATSEQDENSE;
3348           bs = 1;
3349         }
3350         break;
3351       case MATAIJ_PRIVATE:
3352         new_local_type = MATSEQAIJ;
3353         bs = 1;
3354         break;
3355       case MATBAIJ_PRIVATE:
3356         new_local_type = MATSEQBAIJ;
3357         break;
3358       case MATSBAIJ_PRIVATE:
3359         new_local_type = MATSEQSBAIJ;
3360         break;
3361       default:
3362         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3363         break;
3364     }
3365   } else { /* by default, new_local_type is seqdense */
3366     new_local_type = MATSEQDENSE;
3367     bs = 1;
3368   }
3369 
3370   /* create MATIS object if needed */
3371   if (reuse == MAT_INITIAL_MATRIX) {
3372     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3373     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3374   } else {
3375     /* it also destroys the local matrices */
3376     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3377   }
3378   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3379   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3380 
3381   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3382 
3383   /* Global to local map of received indices */
3384   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3385   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3386   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3387 
3388   /* restore attributes -> type of incoming data and its size */
3389   buf_size_idxs = 0;
3390   for (i=0;i<n_recvs;i++) {
3391     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3392     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3393     buf_size_idxs += (PetscInt)olengths_idxs[i];
3394   }
3395   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3396 
3397   /* set preallocation */
3398   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3399   if (!newisdense) {
3400     PetscInt *new_local_nnz=0;
3401 
3402     ptr_vals = recv_buffer_vals;
3403     ptr_idxs = recv_buffer_idxs_local;
3404     if (n_recvs) {
3405       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3406     }
3407     for (i=0;i<n_recvs;i++) {
3408       PetscInt j;
3409       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3410         for (j=0;j<*(ptr_idxs+1);j++) {
3411           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3412         }
3413       } else {
3414         /* TODO */
3415       }
3416       ptr_idxs += olengths_idxs[i];
3417     }
3418     if (new_local_nnz) {
3419       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3420       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3421       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3422       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3423       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3424       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3425     } else {
3426       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3427     }
3428     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3429   } else {
3430     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3431   }
3432 
3433   /* set values */
3434   ptr_vals = recv_buffer_vals;
3435   ptr_idxs = recv_buffer_idxs_local;
3436   for (i=0;i<n_recvs;i++) {
3437     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3438       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3439       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3440       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3441       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3442       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3443     } else {
3444       /* TODO */
3445     }
3446     ptr_idxs += olengths_idxs[i];
3447     ptr_vals += olengths_vals[i];
3448   }
3449   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3450   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3451   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3452   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3453   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3454   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3455 
3456 #if 0
3457   if (!restrict_comm) { /* check */
3458     Vec       lvec,rvec;
3459     PetscReal infty_error;
3460 
3461     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3462     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3463     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3464     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3465     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3466     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3467     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3468     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3469     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3470   }
3471 #endif
3472 
3473   /* assemble new additional is (if any) */
3474   if (nis) {
3475     PetscInt **temp_idxs,*count_is,j,psum;
3476 
3477     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3478     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3479     ptr_idxs = recv_buffer_idxs_is;
3480     psum = 0;
3481     for (i=0;i<n_recvs;i++) {
3482       for (j=0;j<nis;j++) {
3483         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3484         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3485         psum += plen;
3486         ptr_idxs += plen+1; /* shift pointer to received data */
3487       }
3488     }
3489     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3490     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3491     for (i=1;i<nis;i++) {
3492       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3493     }
3494     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3495     ptr_idxs = recv_buffer_idxs_is;
3496     for (i=0;i<n_recvs;i++) {
3497       for (j=0;j<nis;j++) {
3498         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3499         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3500         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3501         ptr_idxs += plen+1; /* shift pointer to received data */
3502       }
3503     }
3504     for (i=0;i<nis;i++) {
3505       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3506       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3507       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(count_is);CHKERRQ(ierr);
3510     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3511     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3512   }
3513   /* free workspace */
3514   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3515   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3516   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3517   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3518   if (isdense) {
3519     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3520     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3521   } else {
3522     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3523   }
3524   if (nis) {
3525     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3526     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3527   }
3528   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3529   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3530   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3531   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3532   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3533   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3534   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3535   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3536   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3537   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3538   ierr = PetscFree(onodes);CHKERRQ(ierr);
3539   if (nis) {
3540     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3541     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3542     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3543   }
3544   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3545   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3546     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3547     for (i=0;i<nis;i++) {
3548       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3549     }
3550   }
3551   PetscFunctionReturn(0);
3552 }
3553 
3554 /* temporary hack into ksp private data structure */
3555 #include <petsc-private/kspimpl.h>
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3559 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3560 {
3561   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3562   PC_IS                  *pcis = (PC_IS*)pc->data;
3563   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3564   MatNullSpace           CoarseNullSpace=NULL;
3565   ISLocalToGlobalMapping coarse_islg;
3566   IS                     coarse_is,*isarray;
3567   PetscInt               i,im_active=-1,active_procs=-1;
3568   PetscInt               nis,nisdofs,nisneu;
3569   PC                     pc_temp;
3570   PCType                 coarse_pc_type;
3571   KSPType                coarse_ksp_type;
3572   PetscBool              multilevel_requested,multilevel_allowed;
3573   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3574   Mat                    t_coarse_mat_is;
3575   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3576   PetscMPIInt            all_procs;
3577   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3578   PetscBool              compute_vecs = PETSC_FALSE;
3579   PetscScalar            *array;
3580   PetscErrorCode         ierr;
3581 
3582   PetscFunctionBegin;
3583   /* Assign global numbering to coarse dofs */
3584   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3585     compute_vecs = PETSC_TRUE;
3586     PetscInt ocoarse_size;
3587     ocoarse_size = pcbddc->coarse_size;
3588     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3589     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3590     /* see if we can avoid some work */
3591     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3592       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3593         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3594         coarse_reuse = PETSC_FALSE;
3595       } else { /* we can safely reuse already computed coarse matrix */
3596         coarse_reuse = PETSC_TRUE;
3597       }
3598     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3599       coarse_reuse = PETSC_FALSE;
3600     }
3601     /* reset any subassembling information */
3602     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3603     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3604   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3605     coarse_reuse = PETSC_TRUE;
3606   }
3607 
3608   /* count "active" (i.e. with positive local size) and "void" processes */
3609   im_active = !!(pcis->n);
3610   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3611   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3612   void_procs = all_procs-active_procs;
3613   csin_type_simple = PETSC_TRUE;
3614   redist = PETSC_FALSE;
3615   if (pcbddc->current_level && void_procs) {
3616     csin_ml = PETSC_TRUE;
3617     ncoarse_ml = void_procs;
3618     csin_ds = PETSC_TRUE;
3619     ncoarse_ds = void_procs;
3620   } else {
3621     csin_ml = PETSC_FALSE;
3622     ncoarse_ml = all_procs;
3623     if (void_procs) {
3624       csin_ds = PETSC_TRUE;
3625       ncoarse_ds = void_procs;
3626       csin_type_simple = PETSC_FALSE;
3627     } else {
3628       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3629         csin_ds = PETSC_TRUE;
3630         ncoarse_ds = pcbddc->redistribute_coarse;
3631         redist = PETSC_TRUE;
3632       } else {
3633         csin_ds = PETSC_FALSE;
3634         ncoarse_ds = all_procs;
3635       }
3636     }
3637   }
3638 
3639   /*
3640     test if we can go multilevel: three conditions must be satisfied:
3641     - we have not exceeded the number of levels requested
3642     - we can actually subassemble the active processes
3643     - we can find a suitable number of MPI processes where we can place the subassembled problem
3644   */
3645   multilevel_allowed = PETSC_FALSE;
3646   multilevel_requested = PETSC_FALSE;
3647   if (pcbddc->current_level < pcbddc->max_levels) {
3648     multilevel_requested = PETSC_TRUE;
3649     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3650       multilevel_allowed = PETSC_FALSE;
3651     } else {
3652       multilevel_allowed = PETSC_TRUE;
3653     }
3654   }
3655   /* determine number of process partecipating to coarse solver */
3656   if (multilevel_allowed) {
3657     ncoarse = ncoarse_ml;
3658     csin = csin_ml;
3659   } else {
3660     ncoarse = ncoarse_ds;
3661     csin = csin_ds;
3662   }
3663 
3664   /* creates temporary l2gmap and IS for coarse indexes */
3665   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3666   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3667 
3668   /* creates temporary MATIS object for coarse matrix */
3669   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3670   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3671   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3672   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3673 #if 0
3674   {
3675     PetscViewer viewer;
3676     char filename[256];
3677     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3678     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3679     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3680     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3681     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3682   }
3683 #endif
3684   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3685   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3686   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3689 
3690   /* compute dofs splitting and neumann boundaries for coarse dofs */
3691   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3692     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3693     const PetscInt         *idxs;
3694     ISLocalToGlobalMapping tmap;
3695 
3696     /* create map between primal indices (in local representative ordering) and local primal numbering */
3697     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3698     /* allocate space for temporary storage */
3699     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3700     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3701     /* allocate for IS array */
3702     nisdofs = pcbddc->n_ISForDofsLocal;
3703     nisneu = !!pcbddc->NeumannBoundariesLocal;
3704     nis = nisdofs + nisneu;
3705     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3706     /* dofs splitting */
3707     for (i=0;i<nisdofs;i++) {
3708       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3709       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3710       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3711       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3712       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3713       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3714       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3715       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3716     }
3717     /* neumann boundaries */
3718     if (pcbddc->NeumannBoundariesLocal) {
3719       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3720       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3721       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3722       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3723       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3724       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3725       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3726       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3727     }
3728     /* free memory */
3729     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3730     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3731     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3732   } else {
3733     nis = 0;
3734     nisdofs = 0;
3735     nisneu = 0;
3736     isarray = NULL;
3737   }
3738   /* destroy no longer needed map */
3739   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3740 
3741   /* restrict on coarse candidates (if needed) */
3742   coarse_mat_is = NULL;
3743   if (csin) {
3744     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3745       if (redist) {
3746         PetscMPIInt rank;
3747         PetscInt spc,n_spc_p1,dest[1];
3748 
3749         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3750         spc = all_procs/pcbddc->redistribute_coarse;
3751         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
3752         if (rank > n_spc_p1*(spc+1)-1) {
3753           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3754         } else {
3755           dest[0] = rank/(spc+1);
3756         }
3757         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3758       } else {
3759         PetscInt j,tissize,*nisindices;
3760         PetscInt *coarse_candidates;
3761         const PetscInt* tisindices;
3762         /* get coarse candidates' ranks in pc communicator */
3763         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3764         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3765         for (i=0,j=0;i<all_procs;i++) {
3766           if (!coarse_candidates[i]) {
3767             coarse_candidates[j]=i;
3768             j++;
3769           }
3770         }
3771         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3772         /* get a suitable subassembling pattern */
3773         if (csin_type_simple) {
3774           PetscMPIInt rank;
3775           PetscInt    issize,isidx;
3776           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3777           if (im_active) {
3778             issize = 1;
3779             isidx = (PetscInt)rank;
3780           } else {
3781             issize = 0;
3782             isidx = -1;
3783           }
3784           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3785         } else {
3786           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3787         }
3788         if (pcbddc->dbg_flag) {
3789           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3790           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3791           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3792           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3793           for (i=0;i<j;i++) {
3794             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3795           }
3796           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3797           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3798         }
3799         /* shift the pattern on coarse candidates */
3800         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3801         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3802         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
3803         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3804         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3805         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3806         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3807       }
3808     }
3809     if (pcbddc->dbg_flag) {
3810       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3811       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3812       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3813       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3814     }
3815     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3816     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3817   } else {
3818     if (pcbddc->dbg_flag) {
3819       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3820       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3821       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3822     }
3823     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3824     coarse_mat_is = t_coarse_mat_is;
3825   }
3826 
3827   /* create local to global scatters for coarse problem */
3828   if (compute_vecs) {
3829     PetscInt lrows;
3830     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3831     if (coarse_mat_is) {
3832       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3833     } else {
3834       lrows = 0;
3835     }
3836     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3837     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3838     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3839     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3840     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3841   }
3842   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3843   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3844 
3845   /* set defaults for coarse KSP and PC */
3846   if (multilevel_allowed) {
3847     coarse_ksp_type = KSPRICHARDSON;
3848     coarse_pc_type = PCBDDC;
3849   } else {
3850     coarse_ksp_type = KSPPREONLY;
3851     coarse_pc_type = PCREDUNDANT;
3852   }
3853 
3854   /* print some info if requested */
3855   if (pcbddc->dbg_flag) {
3856     if (!multilevel_allowed) {
3857       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3858       if (multilevel_requested) {
3859         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3860       } else if (pcbddc->max_levels) {
3861         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3862       }
3863       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3864     }
3865   }
3866 
3867   /* create the coarse KSP object only once with defaults */
3868   if (coarse_mat_is) {
3869     MatReuse coarse_mat_reuse;
3870     PetscViewer dbg_viewer = NULL;
3871     if (pcbddc->dbg_flag) {
3872       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3873       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3874     }
3875     if (!pcbddc->coarse_ksp) {
3876       char prefix[256],str_level[16];
3877       size_t len;
3878       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3879       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3880       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3881       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3882       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3883       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3884       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3885       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3886       /* prefix */
3887       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3888       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3889       if (!pcbddc->current_level) {
3890         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3891         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3892       } else {
3893         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3894         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3895         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3896         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3897         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3898         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3899       }
3900       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3901       /* allow user customization */
3902       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3903       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3904     }
3905 
3906     /* get some info after set from options */
3907     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3908     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3909     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3910     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3911     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3912       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3913       isbddc = PETSC_FALSE;
3914     }
3915     if (isredundant) {
3916       KSP inner_ksp;
3917       PC inner_pc;
3918       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3919       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3920       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3921     }
3922 
3923     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3924     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3925     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3926     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3927     if (nisdofs) {
3928       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3929       for (i=0;i<nisdofs;i++) {
3930         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3931       }
3932     }
3933     if (nisneu) {
3934       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3935       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3936     }
3937 
3938     /* assemble coarse matrix */
3939     if (coarse_reuse) {
3940       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3941       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3942       coarse_mat_reuse = MAT_REUSE_MATRIX;
3943     } else {
3944       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3945     }
3946     if (isbddc || isnn) {
3947       if (pcbddc->coarsening_ratio > 1) {
3948         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3949           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3950           if (pcbddc->dbg_flag) {
3951             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3952             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3953             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3954             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3955           }
3956         }
3957         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3958       } else {
3959         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
3960         coarse_mat = coarse_mat_is;
3961       }
3962     } else {
3963       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3964     }
3965     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3966 
3967     /* propagate symmetry info to coarse matrix */
3968     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3969     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3970 
3971     /* set operators */
3972     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3973     if (pcbddc->dbg_flag) {
3974       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3975     }
3976   } else { /* processes non partecipating to coarse solver (if any) */
3977     coarse_mat = 0;
3978   }
3979   ierr = PetscFree(isarray);CHKERRQ(ierr);
3980 #if 0
3981   {
3982     PetscViewer viewer;
3983     char filename[256];
3984     sprintf(filename,"coarse_mat.m");
3985     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3986     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3987     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3988     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3989   }
3990 #endif
3991 
3992   /* Compute coarse null space (special handling by BDDC only) */
3993   if (pcbddc->NullSpace) {
3994     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3995   }
3996 
3997   if (pcbddc->coarse_ksp) {
3998     Vec crhs,csol;
3999     PetscBool ispreonly;
4000     if (CoarseNullSpace) {
4001       if (isbddc) {
4002         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4003       } else {
4004         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4005       }
4006     }
4007     /* setup coarse ksp */
4008     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4009     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4010     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4011     /* hack */
4012     if (!csol) {
4013       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4014     }
4015     if (!crhs) {
4016       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4017     }
4018     /* Check coarse problem if in debug mode or if solving with an iterative method */
4019     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4020     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4021       KSP       check_ksp;
4022       KSPType   check_ksp_type;
4023       PC        check_pc;
4024       Vec       check_vec,coarse_vec;
4025       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4026       PetscInt  its;
4027       PetscBool compute_eigs;
4028       PetscReal *eigs_r,*eigs_c;
4029       PetscInt  neigs;
4030       const char *prefix;
4031 
4032       /* Create ksp object suitable for estimation of extreme eigenvalues */
4033       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4034       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4035       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4036       if (ispreonly) {
4037         check_ksp_type = KSPPREONLY;
4038         compute_eigs = PETSC_FALSE;
4039       } else {
4040         check_ksp_type = KSPGMRES;
4041         compute_eigs = PETSC_TRUE;
4042       }
4043       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4044       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4045       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4046       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4047       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4048       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4049       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4050       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4051       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4052       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4053       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4054       /* create random vec */
4055       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4056       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4057       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4058       if (CoarseNullSpace) {
4059         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4060       }
4061       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4062       /* solve coarse problem */
4063       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4064       if (CoarseNullSpace) {
4065         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4066       }
4067       /* set eigenvalue estimation if preonly has not been requested */
4068       if (compute_eigs) {
4069         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4070         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4071         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4072         lambda_max = eigs_r[neigs-1];
4073         lambda_min = eigs_r[0];
4074         if (pcbddc->use_coarse_estimates) {
4075           if (lambda_max>lambda_min) {
4076             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4077             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4078           }
4079         }
4080       }
4081 
4082       /* check coarse problem residual error */
4083       if (pcbddc->dbg_flag) {
4084         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4085         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4086         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4087         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4088         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4089         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4090         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4091         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4092         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4093         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4094         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4095         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4096         if (compute_eigs) {
4097           PetscReal lambda_max_s,lambda_min_s;
4098           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4099           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4100           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4101           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4102           for (i=0;i<neigs;i++) {
4103             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4104           }
4105         }
4106         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4107         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4108       }
4109       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4110       if (compute_eigs) {
4111         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4112         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4113       }
4114     }
4115   }
4116   /* print additional info */
4117   if (pcbddc->dbg_flag) {
4118     /* waits until all processes reaches this point */
4119     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4120     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4121     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4122   }
4123 
4124   /* free memory */
4125   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4126   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4127   PetscFunctionReturn(0);
4128 }
4129 
4130 #undef __FUNCT__
4131 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4132 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4133 {
4134   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4135   PC_IS*         pcis = (PC_IS*)pc->data;
4136   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4137   PetscInt       i,coarse_size;
4138   PetscInt       *local_primal_indices;
4139   PetscErrorCode ierr;
4140 
4141   PetscFunctionBegin;
4142   /* Compute global number of coarse dofs */
4143   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4144     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4145   }
4146   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4147 
4148   /* check numbering */
4149   if (pcbddc->dbg_flag) {
4150     PetscScalar coarsesum,*array;
4151     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4152 
4153     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4154     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4155     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4156     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4157     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4158     for (i=0;i<pcbddc->local_primal_size;i++) {
4159       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4160     }
4161     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4162     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4163     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4164     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4165     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4166     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4167     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4168     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4169     for (i=0;i<pcis->n;i++) {
4170       if (array[i] == 1.0) {
4171         set_error = PETSC_TRUE;
4172         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4173       }
4174     }
4175     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4176     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4177     for (i=0;i<pcis->n;i++) {
4178       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4179     }
4180     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4181     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4182     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4183     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4184     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4185     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4186     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4187       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4188       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4189       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4190       for (i=0;i<pcbddc->local_primal_size;i++) {
4191         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4192       }
4193       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4194     }
4195     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4196     if (set_error_reduced) {
4197       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4198     }
4199   }
4200   /* get back data */
4201   *coarse_size_n = coarse_size;
4202   *local_primal_indices_n = local_primal_indices;
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 #undef __FUNCT__
4207 #define __FUNCT__ "PCBDDCGlobalToLocal"
4208 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4209 {
4210   IS             localis_t;
4211   PetscInt       i,lsize,*idxs,n;
4212   PetscScalar    *vals;
4213   PetscErrorCode ierr;
4214 
4215   PetscFunctionBegin;
4216   /* get indices in local ordering exploiting local to global map */
4217   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4218   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4219   for (i=0;i<lsize;i++) vals[i] = 1.0;
4220   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4221   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4222   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4223   if (idxs) { /* multilevel guard */
4224     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4225   }
4226   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4227   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4228   ierr = PetscFree(vals);CHKERRQ(ierr);
4229   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4230   /* now compute set in local ordering */
4231   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4232   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4233   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4234   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4235   for (i=0,lsize=0;i<n;i++) {
4236     if (PetscRealPart(vals[i]) > 0.5) {
4237       lsize++;
4238     }
4239   }
4240   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4241   for (i=0,lsize=0;i<n;i++) {
4242     if (PetscRealPart(vals[i]) > 0.5) {
4243       idxs[lsize++] = i;
4244     }
4245   }
4246   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4247   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4248   *localis = localis_t;
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4253 #undef __FUNCT__
4254 #define __FUNCT__ "PCBDDCMatMult_Private"
4255 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4256 {
4257   PCBDDCChange_ctx change_ctx;
4258   PetscErrorCode   ierr;
4259 
4260   PetscFunctionBegin;
4261   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4262   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4263   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4264   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 #undef __FUNCT__
4269 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4270 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4271 {
4272   PCBDDCChange_ctx change_ctx;
4273   PetscErrorCode   ierr;
4274 
4275   PetscFunctionBegin;
4276   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4277   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4278   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4279   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4280   PetscFunctionReturn(0);
4281 }
4282