xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 2e1e5fa4b2a842682a2720c91064d65d1cccd2a0)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_constraints,n_R,old_size;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   if (!pcbddc->ConstraintMatrix) {
128     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129   }
130   /* get sizes */
131   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132   n_R = pcis->n-pcbddc->n_actual_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (pcbddc->local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              unsymmetric_check;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices (corners plus constraints with change of basis)
205      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206   n_vertices = pcbddc->n_actual_vertices;
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
222   if (i != n_vertices) {
223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224   }
225 
226   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227   if (n_constraints) {
228     /* see if we can save some allocations */
229     if (pcbddc->local_auxmat2) {
230       PetscInt on_R,on_constraints;
231       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
232       if (on_R != n_R || on_constraints != n_constraints) {
233         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
234         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
235       }
236     }
237     /* work vectors */
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
240     /* auxiliary matrices */
241     if (!pcbddc->local_auxmat2) {
242       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
243       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
244       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
245       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
246     }
247 
248     /* Extract constraints on R nodes: C_{CR}  */
249     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
250     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
251     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
252 
253     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254     for (i=0;i<n_constraints;i++) {
255       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
256       /* Get row of constraint matrix in R numbering */
257       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
259       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
261       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
262       /* Solve for row of constraint matrix in R numbering */
263       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
264       /* Set values in local_auxmat2 */
265       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
267       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268     }
269     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
272 
273     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
275     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
276     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
277     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
278     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
279     ierr = MatSetUp(M1);CHKERRQ(ierr);
280     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
281     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
282     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
283     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
285     ierr = MatDestroy(&M2);CHKERRQ(ierr);
286     ierr = MatDestroy(&M3);CHKERRQ(ierr);
287     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288     if (!pcbddc->local_auxmat1) {
289       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     } else {
291       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     }
293   }
294 
295   /* Get submatrices from subdomain matrix */
296   if (n_vertices) {
297     PetscInt ibs,mbs;
298     PetscBool issbaij;
299     Mat newmat;
300 
301     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
304     if (ibs != mbs) { /* need to convert to SEQAIJ */
305       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
309       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
310     } else {
311       /* this is safe */
312       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
314       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
316         /* which of the two approaches is faster? */
317         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
319         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
321         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
322       } else {
323         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
324         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
325       }
326     }
327     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
328     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
329     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
330   }
331 
332   /* Matrix of coarse basis functions (local) */
333   if (pcbddc->coarse_phi_B) {
334     PetscInt on_B,on_primal;
335     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
336     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
338       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
339     }
340   }
341   if (pcbddc->coarse_phi_D) {
342     PetscInt on_D,on_primal;
343     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
344     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
346       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
347     }
348   }
349   if (!pcbddc->coarse_phi_B) {
350     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
351     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
352     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
353     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
354   }
355   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
357     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
358     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
359     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
360   }
361 
362   if (pcbddc->dbg_flag) {
363     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
366   }
367   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371 
372   /* vertices */
373   for (i=0;i<n_vertices;i++) {
374     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
376     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
377     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
378     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
379     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
380     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
384     if (n_constraints) {
385       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
386       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
388     }
389     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
390     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
391 
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
395     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
399     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
401     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     if (n_constraints) {
413       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
416     }
417 
418     /* check */
419     if (pcbddc->dbg_flag) {
420       /* assemble subdomain vector on local nodes */
421       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
427       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
428       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
430       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       if (n_constraints) {
434         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       }
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
441       /* check saddle point solution */
442       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
445       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
446       /* shift by the identity matrix */
447       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
451     }
452   }
453 
454   /* constraints */
455   for (i=0;i<n_constraints;i++) {
456     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
457     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
458     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
459     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
460     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
462     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
463     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
464     if (n_vertices) {
465       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
466     }
467     /* Set values in coarse basis function and subdomain part of coarse_mat */
468     /* coarse basis functions */
469     j = i+n_vertices; /* don't touch this! */
470     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
471     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482     }
483     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484     if (n_vertices) {
485       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
486       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
488     }
489     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
490     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
492 
493     if (pcbddc->dbg_flag) {
494       /* assemble subdomain vector on nodes */
495       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
496       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
498       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
500       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
501       /* assemble subdomain vector of lagrange multipliers */
502       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
503       if (n_vertices) {
504         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
505         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
506         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
507       }
508       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
514       /* check saddle point solution */
515       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
518       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
519       /* shift by the identity matrix */
520       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
521       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
524     }
525   }
526   /* call assembling routines for local coarse basis */
527   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   }
533 
534   /* compute other basis functions for non-symmetric problems */
535   ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr);
536   if (!pcbddc->issym) {
537     if (!pcbddc->coarse_psi_B) {
538       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
539       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
540       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
541       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
542     }
543     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
545       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
546       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
547       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
548     }
549     for (i=0;i<pcbddc->local_primal_size;i++) {
550       if (n_constraints) {
551         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
552         for (j=0;j<n_constraints;j++) {
553           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
554         }
555         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
556         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
557       }
558       if (i<n_vertices) {
559         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
560         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
561         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
562         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
563         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
564         if (n_constraints) {
565           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
566         }
567       } else {
568         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
569       }
570       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
571       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
572       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
576       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       if (i<n_vertices) {
578         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
579       }
580       if (pcbddc->switch_static || pcbddc->dbg_flag) {
581         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
585         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586       }
587 
588       if (pcbddc->dbg_flag) {
589         /* assemble subdomain vector on nodes */
590         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
591         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
593         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         if (i<n_vertices) {
595           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
596         }
597         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
598         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
599         /* assemble subdomain vector of lagrange multipliers */
600         for (j=0;j<pcbddc->local_primal_size;j++) {
601           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
602         }
603         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
604         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
605         /* check saddle point solution */
606         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
609         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
610         /* shift by the identity matrix */
611         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
612         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
615       }
616     }
617     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     if (pcbddc->switch_static || pcbddc->dbg_flag) {
620       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622     }
623     unsymmetric_check = PETSC_TRUE;
624   } else { /* take references to already computed coarse basis */
625     unsymmetric_check = PETSC_FALSE;
626     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
627     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
628     if (pcbddc->coarse_phi_D) {
629       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
630       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
631     }
632   }
633   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
634   /* Checking coarse_sub_mat and coarse basis functios */
635   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
637   if (pcbddc->dbg_flag) {
638     Mat         coarse_sub_mat;
639     Mat         AUXMAT,TM1,TM2,TM3,TM4;
640     Mat         coarse_phi_D,coarse_phi_B;
641     Mat         coarse_psi_D,coarse_psi_B;
642     Mat         A_II,A_BB,A_IB,A_BI;
643     MatType     checkmattype=MATSEQAIJ;
644     PetscReal   real_value;
645 
646     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
649     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
651     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
652     if (unsymmetric_check) {
653       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
654       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
655     }
656     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
657 
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
659     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
660     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
661     if (unsymmetric_check) {
662       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
663       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
664       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
665       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
666       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
667       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
668       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674     } else {
675       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
676       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
677       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683     }
684     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
687     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
688     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
689     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
694     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
695     for (i=0;i<pcbddc->local_primal_size;i++) {
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
697     }
698     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
699     for (i=0;i<pcbddc->local_primal_size;i++) {
700       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
701     }
702     if (unsymmetric_check) {
703       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
704       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
705         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
706       }
707       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
708       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
709         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
710       }
711     }
712     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
716     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
720     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
722     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
723     if (unsymmetric_check) {
724       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
725       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
726     }
727     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
728     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
729     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
730     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
731   }
732   /* free memory */
733   if (n_vertices) {
734     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
735     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
738     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
739   }
740   if (n_constraints) {
741     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
742     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
743     ierr = MatDestroy(&M1);CHKERRQ(ierr);
744     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
745   }
746   ierr = PetscFree(auxindices);CHKERRQ(ierr);
747   /* get back data */
748   *coarse_submat_vals_n = coarse_submat_vals;
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
755 {
756   PC_IS*            pcis = (PC_IS*)(pc->data);
757   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
758   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
759   PetscBool         issbaij,isseqaij;
760   /* manage repeated solves */
761   MatReuse          reuse;
762   PetscErrorCode    ierr;
763 
764   PetscFunctionBegin;
765   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
766     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767   }
768   /* get mat flags */
769   reuse = MAT_INITIAL_MATRIX;
770   if (pc->setupcalled) {
771     if (pc->flag == SAME_NONZERO_PATTERN) {
772       reuse = MAT_REUSE_MATRIX;
773     } else {
774       reuse = MAT_INITIAL_MATRIX;
775     }
776   }
777   if (reuse == MAT_INITIAL_MATRIX) {
778     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
780     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
781     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
783   }
784 
785   /* transform local matrices if needed */
786   if (pcbddc->ChangeOfBasisMatrix) {
787     Mat         change_mat_all;
788     PetscScalar *row_cmat_values;
789     PetscInt    *row_cmat_indices;
790     PetscInt    *nnz,*is_indices,*temp_indices;
791     PetscInt    i,j,k,n_D,n_B;
792 
793     /* Get Non-overlapping dimensions */
794     n_B = pcis->n_B;
795     n_D = pcis->n-n_B;
796 
797     /* compute nonzero structure of change of basis on all local nodes */
798     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
799     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
800     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
801     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
802     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     k=1;
804     for (i=0;i<n_B;i++) {
805       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
806       nnz[is_indices[i]]=j;
807       if (k < j) k = j;
808       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
809     }
810     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     /* assemble change of basis matrix on the whole set of local dofs */
812     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
813     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
814     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
815     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
816     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
817     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     for (i=0;i<n_D;i++) {
819       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
822     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
823     for (i=0;i<n_B;i++) {
824       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
825       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
826       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
827       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
828     }
829     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
831     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
832     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
833     if (isseqaij) {
834       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
835     } else {
836       Mat work_mat;
837       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
838       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
839       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
840     }
841     /*
842     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
843     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
844     */
845     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
846     ierr = PetscFree(nnz);CHKERRQ(ierr);
847     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
848   } else {
849     /* without change of basis, the local matrix is unchanged */
850     if (!pcbddc->local_mat) {
851       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
852       pcbddc->local_mat = matis->A;
853     }
854   }
855 
856   /* get submatrices */
857   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
858   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
859   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
860   if (!issbaij) {
861     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
862     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
863   } else {
864     Mat newmat;
865     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
866     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
867     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
868     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
876 {
877   PC_IS*         pcis = (PC_IS*)(pc->data);
878   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
879   IS             is_aux1,is_aux2;
880   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
881   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
882   PetscInt       vbs,bs;
883   PetscBT        bitmask;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   /*
888     No need to setup local scatters if
889       - primal space is unchanged
890         AND
891       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
892         AND
893       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
894   */
895   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
896     PetscFunctionReturn(0);
897   }
898   /* destroy old objects */
899   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
900   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
901   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
902   /* Set Non-overlapping dimensions */
903   n_B = pcis->n_B; n_D = pcis->n - n_B;
904   n_vertices = pcbddc->n_actual_vertices;
905   /* create auxiliary bitmask */
906   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
907   for (i=0;i<n_vertices;i++) {
908     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
909   }
910 
911   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
912   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
913   for (i=0, n_R=0; i<pcis->n; i++) {
914     if (!PetscBTLookup(bitmask,i)) {
915       idx_R_local[n_R] = i;
916       n_R++;
917     }
918   }
919 
920   /* Block code */
921   vbs = 1;
922   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
923   if (bs>1 && !(n_vertices%bs)) {
924     PetscBool is_blocked = PETSC_TRUE;
925     PetscInt  *vary;
926     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
927     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
928     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
929     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
930     for (i=0; i<n_vertices; i++) {
931       if (vary[i]!=0 && vary[i]!=bs) {
932         is_blocked = PETSC_FALSE;
933         break;
934       }
935     }
936     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
937       vbs = bs;
938       for (i=0;i<n_R/vbs;i++) {
939         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
940       }
941     }
942     ierr = PetscFree(vary);CHKERRQ(ierr);
943   }
944   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
945   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
946 
947   /* print some info if requested */
948   if (pcbddc->dbg_flag) {
949     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
950     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
951     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
952     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
953     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957   }
958 
959   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
960   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
961   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
962   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
963   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
964   for (i=0; i<n_D; i++) {
965     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
966   }
967   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_R; i++) {
969     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
970       aux_array1[j++] = i;
971     }
972   }
973   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
974   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
975   for (i=0, j=0; i<n_B; i++) {
976     if (!PetscBTLookup(bitmask,is_indices[i])) {
977       aux_array2[j++] = i;
978     }
979   }
980   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
981   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
982   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
983   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
984   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
985 
986   if (pcbddc->switch_static || pcbddc->dbg_flag) {
987     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
988     for (i=0, j=0; i<n_R; i++) {
989       if (PetscBTLookup(bitmask,idx_R_local[i])) {
990         aux_array1[j++] = i;
991       }
992     }
993     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
994     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
995     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
996   }
997   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
998   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1006 {
1007   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1008   PC_IS          *pcis = (PC_IS*)pc->data;
1009   PC             pc_temp;
1010   Mat            A_RR;
1011   MatReuse       reuse;
1012   PetscScalar    m_one = -1.0;
1013   PetscReal      value;
1014   PetscInt       n_D,n_R,ibs,mbs;
1015   PetscBool      use_exact,use_exact_reduced,issbaij;
1016   PetscErrorCode ierr;
1017   /* prefixes stuff */
1018   char           dir_prefix[256],neu_prefix[256],str_level[16];
1019   size_t         len;
1020 
1021   PetscFunctionBegin;
1022 
1023   /* compute prefixes */
1024   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1025   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1026   if (!pcbddc->current_level) {
1027     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1028     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1029     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1030     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1031   } else {
1032     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1033     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1034     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1035     len -= 15; /* remove "pc_bddc_coarse_" */
1036     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1037     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1038     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1039     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1040     *(dir_prefix+len)='\0';
1041     *(neu_prefix+len)='\0';
1042     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1043     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1044     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1045     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1046   }
1047 
1048   /* DIRICHLET PROBLEM */
1049   /* Matrix for Dirichlet problem is pcis->A_II */
1050   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1051   if (!pcbddc->ksp_D) { /* create object if not yet build */
1052     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1053     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1054     /* default */
1055     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1056     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1057     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1058     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1059     if (issbaij) {
1060       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1061     } else {
1062       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1063     }
1064     /* Allow user's customization */
1065     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1066     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1067   }
1068   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1069   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1070   if (!n_D) {
1071     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1072     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1073   }
1074   /* Set Up KSP for Dirichlet problem of BDDC */
1075   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1076   /* set ksp_D into pcis data */
1077   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1078   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1079   pcis->ksp_D = pcbddc->ksp_D;
1080 
1081   /* NEUMANN PROBLEM */
1082   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1083   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1084   if (pcbddc->ksp_R) { /* already created ksp */
1085     PetscInt nn_R;
1086     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1087     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1088     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1089     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1090       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1091       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1092       reuse = MAT_INITIAL_MATRIX;
1093     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1094       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1095         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1096         reuse = MAT_INITIAL_MATRIX;
1097       } else { /* safe to reuse the matrix */
1098         reuse = MAT_REUSE_MATRIX;
1099       }
1100     }
1101     /* last check */
1102     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1103       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1104       reuse = MAT_INITIAL_MATRIX;
1105     }
1106   } else { /* first time, so we need to create the matrix */
1107     reuse = MAT_INITIAL_MATRIX;
1108   }
1109   /* extract A_RR */
1110   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1111   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1112   if (ibs != mbs) {
1113     Mat newmat;
1114     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1115     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1116     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1117   } else {
1118     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1119   }
1120   if (!pcbddc->ksp_R) { /* create object if not present */
1121     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1122     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1123     /* default */
1124     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1125     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1126     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1127     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1128     if (issbaij) {
1129       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1130     } else {
1131       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1132     }
1133     /* Allow user's customization */
1134     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1135     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1136   }
1137   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1138   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1139   if (!n_R) {
1140     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1141     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1142   }
1143   /* Set Up KSP for Neumann problem of BDDC */
1144   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1145 
1146   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1147   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1148     /* Dirichlet */
1149     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1150     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1151     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1152     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1153     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1154     /* need to be adapted? */
1155     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1156     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1157     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1158     /* print info */
1159     if (pcbddc->dbg_flag) {
1160       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1161       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1162       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1163       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1164       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1165       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1166     }
1167     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1168       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1169     }
1170 
1171     /* Neumann */
1172     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1173     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1174     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1175     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1176     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1177     /* need to be adapted? */
1178     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1179     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1180     /* print info */
1181     if (pcbddc->dbg_flag) {
1182       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1183       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1184     }
1185     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1186       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1187     }
1188   }
1189   /* free Neumann problem's matrix */
1190   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1196 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1197 {
1198   PetscErrorCode ierr;
1199   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1200 
1201   PetscFunctionBegin;
1202   if (applytranspose) {
1203     if (pcbddc->local_auxmat1) {
1204       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1205       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1206     }
1207     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1208   } else {
1209     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1210     if (pcbddc->local_auxmat1) {
1211       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1212       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1213     }
1214   }
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1219 #undef __FUNCT__
1220 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1221 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1222 {
1223   PetscErrorCode ierr;
1224   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1225   PC_IS*            pcis = (PC_IS*)  (pc->data);
1226   const PetscScalar zero = 0.0;
1227 
1228   PetscFunctionBegin;
1229   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1230   if (applytranspose) {
1231     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1232     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1233   } else {
1234     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1235     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1236   }
1237   /* start communications from local primal nodes to rhs of coarse solver */
1238   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1239   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1241 
1242   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1243   /* TODO remove null space when doing multilevel */
1244   if (pcbddc->coarse_ksp) {
1245     if (applytranspose) {
1246       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1247     } else {
1248       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1249     }
1250   }
1251   /* start communications from coarse solver solution to local primal nodes */
1252   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1253 
1254   /* Local solution on R nodes */
1255   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1256   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258   if (pcbddc->switch_static) {
1259     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1260     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1261   }
1262   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1263   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1264   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266   if (pcbddc->switch_static) {
1267     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   }
1270 
1271   /* complete communications from coarse sol to local primal nodes */
1272   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1273 
1274   /* Sum contributions from two levels */
1275   if (applytranspose) {
1276     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1277     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1278   } else {
1279     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1280     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1286 #undef __FUNCT__
1287 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1288 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1289 {
1290   PetscErrorCode ierr;
1291   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1292   PetscScalar    *array,*array2;
1293   Vec            from,to;
1294 
1295   PetscFunctionBegin;
1296   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1297     from = pcbddc->coarse_vec;
1298     to = pcbddc->vec1_P;
1299     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1300       Vec tvec;
1301       PetscInt lsize;
1302       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1303       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1304       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1305       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1306       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1307       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1308       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1309     }
1310   } else { /* from local to global -> put data in coarse right hand side */
1311     from = pcbddc->vec1_P;
1312     to = pcbddc->coarse_vec;
1313   }
1314   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1320 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1321 {
1322   PetscErrorCode ierr;
1323   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1324   PetscScalar    *array,*array2;
1325   Vec            from,to;
1326 
1327   PetscFunctionBegin;
1328   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1329     from = pcbddc->coarse_vec;
1330     to = pcbddc->vec1_P;
1331   } else { /* from local to global -> put data in coarse right hand side */
1332     from = pcbddc->vec1_P;
1333     to = pcbddc->coarse_vec;
1334   }
1335   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1336   if (smode == SCATTER_FORWARD) {
1337     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1338       Vec tvec;
1339       PetscInt lsize;
1340       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1341       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1342       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1343       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1344       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1345       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1346       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1347     }
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 /* uncomment for testing purposes */
1353 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1354 #undef __FUNCT__
1355 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1356 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1357 {
1358   PetscErrorCode    ierr;
1359   PC_IS*            pcis = (PC_IS*)(pc->data);
1360   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1361   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1362   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1363   MatType           impMatType=MATSEQAIJ;
1364   /* one and zero */
1365   PetscScalar       one=1.0,zero=0.0;
1366   /* space to store constraints and their local indices */
1367   PetscScalar       *temp_quadrature_constraint;
1368   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1369   /* iterators */
1370   PetscInt          i,j,k,total_counts,temp_start_ptr;
1371   /* stuff to store connected components stored in pcbddc->mat_graph */
1372   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1373   PetscInt          n_ISForFaces,n_ISForEdges;
1374   /* near null space stuff */
1375   MatNullSpace      nearnullsp;
1376   const Vec         *nearnullvecs;
1377   Vec               *localnearnullsp;
1378   PetscBool         nnsp_has_cnst;
1379   PetscInt          nnsp_size;
1380   PetscScalar       *array;
1381   /* BLAS integers */
1382   PetscBLASInt      lwork,lierr;
1383   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1384   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1385   /* LAPACK working arrays for SVD or POD */
1386   PetscBool         skip_lapack;
1387   PetscScalar       *work;
1388   PetscReal         *singular_vals;
1389 #if defined(PETSC_USE_COMPLEX)
1390   PetscReal         *rwork;
1391 #endif
1392 #if defined(PETSC_MISSING_LAPACK_GESVD)
1393   PetscBLASInt      Blas_one_2=1;
1394   PetscScalar       *temp_basis,*correlation_mat;
1395 #else
1396   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1397   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1398 #endif
1399   /* reuse */
1400   PetscInt          olocal_primal_size;
1401   PetscInt          *oprimal_indices_local_idxs;
1402   /* change of basis */
1403   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1404   PetscBool         boolforchange,qr_needed;
1405   PetscBT           touched,change_basis,qr_needed_idx;
1406   /* auxiliary stuff */
1407   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1408   /* some quantities */
1409   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1410   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1411 
1412 
1413   PetscFunctionBegin;
1414   /* Destroy Mat objects computed previously */
1415   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1416   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1417   /* Get index sets for faces, edges and vertices from graph */
1418   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1419     pcbddc->use_vertices = PETSC_TRUE;
1420   }
1421   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1422   /* HACKS (the two following code branches) */
1423   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1424     pcbddc->use_change_of_basis = PETSC_TRUE;
1425     pcbddc->use_change_on_faces = PETSC_FALSE;
1426   }
1427   if (pcbddc->NullSpace) {
1428     /* use_change_of_basis should be consistent among processors */
1429     PetscBool tbool = pcbddc->use_change_of_basis;
1430     ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1431   }
1432   /* print some info */
1433   if (pcbddc->dbg_flag) {
1434     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1435     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1436     i = 0;
1437     if (ISForVertices) {
1438       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1439     }
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1441     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1442     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1443     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1444   }
1445   /* check if near null space is attached to global mat */
1446   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1447   if (nearnullsp) {
1448     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1449     /* remove any stored info */
1450     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1451     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1452     /* store information for BDDC solver reuse */
1453     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1454     pcbddc->onearnullspace = nearnullsp;
1455     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1456     for (i=0;i<nnsp_size;i++) {
1457       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1458     }
1459   } else { /* if near null space is not provided BDDC uses constants by default */
1460     nnsp_size = 0;
1461     nnsp_has_cnst = PETSC_TRUE;
1462   }
1463   /* get max number of constraints on a single cc */
1464   max_constraints = nnsp_size;
1465   if (nnsp_has_cnst) max_constraints++;
1466 
1467   /*
1468        Evaluate maximum storage size needed by the procedure
1469        - temp_indices will contain start index of each constraint stored as follows
1470        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1471        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1472        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1473                                                                                                                                                          */
1474   total_counts = n_ISForFaces+n_ISForEdges;
1475   total_counts *= max_constraints;
1476   n_vertices = 0;
1477   if (ISForVertices) {
1478     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1479   }
1480   total_counts += n_vertices;
1481   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1482   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1483   total_counts = 0;
1484   max_size_of_constraint = 0;
1485   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1486     if (i<n_ISForEdges) {
1487       used_IS = &ISForEdges[i];
1488     } else {
1489       used_IS = &ISForFaces[i-n_ISForEdges];
1490     }
1491     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1492     total_counts += j;
1493     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1494   }
1495   total_counts *= max_constraints;
1496   total_counts += n_vertices;
1497   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1498   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1499   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1500   /* get local part of global near null space vectors */
1501   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1502   for (k=0;k<nnsp_size;k++) {
1503     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1504     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506   }
1507 
1508   /* whether or not to skip lapack calls */
1509   skip_lapack = PETSC_TRUE;
1510   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1511 
1512   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1513   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1514     PetscScalar temp_work;
1515 #if defined(PETSC_MISSING_LAPACK_GESVD)
1516     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1517     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1518     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1519     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1520 #if defined(PETSC_USE_COMPLEX)
1521     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1522 #endif
1523     /* now we evaluate the optimal workspace using query with lwork=-1 */
1524     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1525     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1526     lwork = -1;
1527     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1528 #if !defined(PETSC_USE_COMPLEX)
1529     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1530 #else
1531     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1532 #endif
1533     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1534     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1535 #else /* on missing GESVD */
1536     /* SVD */
1537     PetscInt max_n,min_n;
1538     max_n = max_size_of_constraint;
1539     min_n = max_constraints;
1540     if (max_size_of_constraint < max_constraints) {
1541       min_n = max_size_of_constraint;
1542       max_n = max_constraints;
1543     }
1544     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1545 #if defined(PETSC_USE_COMPLEX)
1546     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1547 #endif
1548     /* now we evaluate the optimal workspace using query with lwork=-1 */
1549     lwork = -1;
1550     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1551     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1552     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1553     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1554 #if !defined(PETSC_USE_COMPLEX)
1555     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1556 #else
1557     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1558 #endif
1559     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1560     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1561 #endif /* on missing GESVD */
1562     /* Allocate optimal workspace */
1563     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1564     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1565   }
1566   /* Now we can loop on constraining sets */
1567   total_counts = 0;
1568   temp_indices[0] = 0;
1569   /* vertices */
1570   if (ISForVertices) {
1571     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1572     if (nnsp_has_cnst) { /* consider all vertices */
1573       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1574       for (i=0;i<n_vertices;i++) {
1575         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1576         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1577         total_counts++;
1578       }
1579     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1580       PetscBool used_vertex;
1581       for (i=0;i<n_vertices;i++) {
1582         used_vertex = PETSC_FALSE;
1583         k = 0;
1584         while (!used_vertex && k<nnsp_size) {
1585           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1586           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1587             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1588             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1589             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1590             total_counts++;
1591             used_vertex = PETSC_TRUE;
1592           }
1593           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1594           k++;
1595         }
1596       }
1597     }
1598     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1599     n_vertices = total_counts;
1600   }
1601 
1602   /* edges and faces */
1603   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1604     if (i<n_ISForEdges) {
1605       used_IS = &ISForEdges[i];
1606       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1607     } else {
1608       used_IS = &ISForFaces[i-n_ISForEdges];
1609       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1610     }
1611     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1612     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1613     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1614     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1615     /* change of basis should not be performed on local periodic nodes */
1616     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1617     if (nnsp_has_cnst) {
1618       PetscScalar quad_value;
1619       temp_constraints++;
1620       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1621       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1622       for (j=0;j<size_of_constraint;j++) {
1623         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1624       }
1625       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1626       total_counts++;
1627     }
1628     for (k=0;k<nnsp_size;k++) {
1629       PetscReal real_value;
1630       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1631       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1632       for (j=0;j<size_of_constraint;j++) {
1633         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1634       }
1635       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1636       /* check if array is null on the connected component */
1637       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1638       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1639       if (real_value > 0.0) { /* keep indices and values */
1640         temp_constraints++;
1641         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1642         total_counts++;
1643       }
1644     }
1645     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1646     valid_constraints = temp_constraints;
1647     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1648     if (!pcbddc->use_nnsp_true && temp_constraints) {
1649       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1650 
1651 #if defined(PETSC_MISSING_LAPACK_GESVD)
1652       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1653          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1654          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1655             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1656             from that computed using LAPACKgesvd
1657          -> This is due to a different computation of eigenvectors in LAPACKheev
1658          -> The quality of the POD-computed basis will be the same */
1659       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1660       /* Store upper triangular part of correlation matrix */
1661       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1662       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1663       for (j=0;j<temp_constraints;j++) {
1664         for (k=0;k<j+1;k++) {
1665           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1666         }
1667       }
1668       /* compute eigenvalues and eigenvectors of correlation matrix */
1669       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1670       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1671 #if !defined(PETSC_USE_COMPLEX)
1672       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1673 #else
1674       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1675 #endif
1676       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1677       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1678       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1679       j=0;
1680       while (j < temp_constraints && singular_vals[j] < tol) j++;
1681       total_counts=total_counts-j;
1682       valid_constraints = temp_constraints-j;
1683       /* scale and copy POD basis into used quadrature memory */
1684       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1685       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1686       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1687       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1688       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1689       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1690       if (j<temp_constraints) {
1691         PetscInt ii;
1692         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1693         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1694         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1695         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1696         for (k=0;k<temp_constraints-j;k++) {
1697           for (ii=0;ii<size_of_constraint;ii++) {
1698             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1699           }
1700         }
1701       }
1702 #else  /* on missing GESVD */
1703       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1704       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1705       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1706       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1707 #if !defined(PETSC_USE_COMPLEX)
1708       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1709 #else
1710       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1711 #endif
1712       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1713       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1714       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1715       k = temp_constraints;
1716       if (k > size_of_constraint) k = size_of_constraint;
1717       j = 0;
1718       while (j < k && singular_vals[k-j-1] < tol) j++;
1719       total_counts = total_counts-temp_constraints+k-j;
1720       valid_constraints = k-j;
1721 #endif /* on missing GESVD */
1722     }
1723     /* setting change_of_basis flag is safe now */
1724     if (boolforchange) {
1725       for (j=0;j<valid_constraints;j++) {
1726         PetscBTSet(change_basis,total_counts-j-1);
1727       }
1728     }
1729   }
1730   /* free index sets of faces, edges and vertices */
1731   for (i=0;i<n_ISForFaces;i++) {
1732     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1733   }
1734   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1735   for (i=0;i<n_ISForEdges;i++) {
1736     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1737   }
1738   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1739   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1740   /* map temp_indices_to_constraint in boundary numbering */
1741   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1742   if (i != temp_indices[total_counts]) {
1743     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1744   }
1745 
1746   /* free workspace */
1747   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1748     ierr = PetscFree(work);CHKERRQ(ierr);
1749 #if defined(PETSC_USE_COMPLEX)
1750     ierr = PetscFree(rwork);CHKERRQ(ierr);
1751 #endif
1752     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1753 #if defined(PETSC_MISSING_LAPACK_GESVD)
1754     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1755     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1756 #endif
1757   }
1758   for (k=0;k<nnsp_size;k++) {
1759     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1760   }
1761   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1762 
1763   /* set quantities in pcbddc data structure and store previous primal size */
1764   /* n_vertices defines the number of subdomain corners in the primal space */
1765   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1766   olocal_primal_size = pcbddc->local_primal_size;
1767   pcbddc->local_primal_size = total_counts;
1768   pcbddc->n_vertices = n_vertices;
1769   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1770 
1771   /* Create constraint matrix */
1772   /* The constraint matrix is used to compute the l2g map of primal dofs */
1773   /* so we need to set it up properly either with or without change of basis */
1774   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1775   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1776   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1777   /* array to compute a local numbering of constraints : vertices first then constraints */
1778   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1779   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1780   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1781   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1782   /* auxiliary stuff for basis change */
1783   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1784   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1785 
1786   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1787   total_primal_vertices=0;
1788   for (i=0;i<pcbddc->local_primal_size;i++) {
1789     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1790     if (size_of_constraint == 1) {
1791       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1792       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1793       aux_primal_minloc[total_primal_vertices]=0;
1794       total_primal_vertices++;
1795     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1796       PetscInt min_loc,min_index;
1797       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1798       /* find first untouched local node */
1799       k = 0;
1800       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1801       min_index = global_indices[k];
1802       min_loc = k;
1803       /* search the minimum among global nodes already untouched on the cc */
1804       for (k=1;k<size_of_constraint;k++) {
1805         /* there can be more than one constraint on a single connected component */
1806         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1807           min_index = global_indices[k];
1808           min_loc = k;
1809         }
1810       }
1811       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1812       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1813       aux_primal_minloc[total_primal_vertices]=min_loc;
1814       total_primal_vertices++;
1815     }
1816   }
1817   /* determine if a QR strategy is needed for change of basis */
1818   qr_needed = PETSC_FALSE;
1819   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1820   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1821     if (PetscBTLookup(change_basis,i)) {
1822       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1823       j = 0;
1824       for (k=0;k<size_of_constraint;k++) {
1825         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1826           j++;
1827         }
1828       }
1829       /* found more than one primal dof on the cc */
1830       if (j > 1) {
1831         PetscBTSet(qr_needed_idx,i);
1832         qr_needed = PETSC_TRUE;
1833       }
1834     }
1835   }
1836   /* free workspace */
1837   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1838 
1839   /* permute indices in order to have a sorted set of vertices */
1840   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1841 
1842   /* nonzero structure of constraint matrix */
1843   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1844   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1845   j=total_primal_vertices;
1846   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1847     if (!PetscBTLookup(change_basis,i)) {
1848       nnz[j]=temp_indices[i+1]-temp_indices[i];
1849       j++;
1850     }
1851   }
1852   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1853   ierr = PetscFree(nnz);CHKERRQ(ierr);
1854   /* set values in constraint matrix */
1855   for (i=0;i<total_primal_vertices;i++) {
1856     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1857   }
1858   total_counts = total_primal_vertices;
1859   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1860     if (!PetscBTLookup(change_basis,i)) {
1861       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1862       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1863       total_counts++;
1864     }
1865   }
1866   /* assembling */
1867   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   /*
1870   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1871   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1872   */
1873   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1874   if (pcbddc->use_change_of_basis) {
1875     /* dual and primal dofs on a single cc */
1876     PetscInt     dual_dofs,primal_dofs;
1877     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1878     PetscInt     primal_counter;
1879     /* working stuff for GEQRF */
1880     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1881     PetscBLASInt lqr_work;
1882     /* working stuff for UNGQR */
1883     PetscScalar  *gqr_work,lgqr_work_t;
1884     PetscBLASInt lgqr_work;
1885     /* working stuff for TRTRS */
1886     PetscScalar  *trs_rhs;
1887     PetscBLASInt Blas_NRHS;
1888     /* pointers for values insertion into change of basis matrix */
1889     PetscInt     *start_rows,*start_cols;
1890     PetscScalar  *start_vals;
1891     /* working stuff for values insertion */
1892     PetscBT      is_primal;
1893 
1894     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1895     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1896     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1897     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1898     /* work arrays */
1899     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1900     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1901     /* nonzeros per row */
1902     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1903       if (PetscBTLookup(change_basis,i)) {
1904         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1905         if (PetscBTLookup(qr_needed_idx,i)) {
1906           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1907         } else {
1908           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1909           /* get local primal index on the cc */
1910           j = 0;
1911           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1912           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1913         }
1914       }
1915     }
1916     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1917     ierr = PetscFree(nnz);CHKERRQ(ierr);
1918     /* Set initial identity in the matrix */
1919     for (i=0;i<pcis->n_B;i++) {
1920       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1921     }
1922 
1923     if (pcbddc->dbg_flag) {
1924       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1925       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1926     }
1927 
1928 
1929     /* Now we loop on the constraints which need a change of basis */
1930     /*
1931        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1932        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1933 
1934        Basic blocks of change of basis matrix T computed
1935 
1936           - Using the following block transformation if there is only a primal dof on the cc
1937             (in the example, primal dof is the last one of the edge in LOCAL ordering
1938              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1939             | 1        0   ...        0     1 |
1940             | 0        1   ...        0     1 |
1941             |              ...                |
1942             | 0        ...            1     1 |
1943             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1944 
1945           - via QR decomposition of constraints otherwise
1946     */
1947     if (qr_needed) {
1948       /* space to store Q */
1949       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1950       /* first we issue queries for optimal work */
1951       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1953       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1954       lqr_work = -1;
1955       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1956       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1957       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1958       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1959       lgqr_work = -1;
1960       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1961       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1962       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1963       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1964       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1965       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1966       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1967       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1968       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1969       /* array to store scaling factors for reflectors */
1970       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
1971       /* array to store rhs and solution of triangular solver */
1972       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
1973       /* allocating workspace for check */
1974       if (pcbddc->dbg_flag) {
1975         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
1976       }
1977     }
1978     /* array to store whether a node is primal or not */
1979     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1980     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
1981     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1982     if (i != total_primal_vertices) {
1983       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1984     }
1985     for (i=0;i<total_primal_vertices;i++) {
1986       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1987     }
1988     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1989 
1990     /* loop on constraints and see whether or not they need a change of basis and compute it */
1991     /* -> using implicit ordering contained in temp_indices data */
1992     total_counts = pcbddc->n_vertices;
1993     primal_counter = total_counts;
1994     while (total_counts<pcbddc->local_primal_size) {
1995       primal_dofs = 1;
1996       if (PetscBTLookup(change_basis,total_counts)) {
1997         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1998         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1999           primal_dofs++;
2000         }
2001         /* get constraint info */
2002         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2003         dual_dofs = size_of_constraint-primal_dofs;
2004 
2005         if (pcbddc->dbg_flag) {
2006           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2007         }
2008 
2009         if (primal_dofs > 1) { /* QR */
2010 
2011           /* copy quadrature constraints for change of basis check */
2012           if (pcbddc->dbg_flag) {
2013             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2014           }
2015           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2016           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2017 
2018           /* compute QR decomposition of constraints */
2019           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2020           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2021           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2022           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2023           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2024           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2025           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2026 
2027           /* explictly compute R^-T */
2028           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2029           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2030           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2031           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2032           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2033           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2034           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2035           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2036           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2037           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2038 
2039           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2040           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2041           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2042           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2043           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2044           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2045           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2046           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2047           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2048 
2049           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2050              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2051              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2052           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2053           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2056           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2057           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2058           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2059           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2060           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2061           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2062 
2063           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2064           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2065           /* insert cols for primal dofs */
2066           for (j=0;j<primal_dofs;j++) {
2067             start_vals = &qr_basis[j*size_of_constraint];
2068             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2069             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2070           }
2071           /* insert cols for dual dofs */
2072           for (j=0,k=0;j<dual_dofs;k++) {
2073             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2074               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2075               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2076               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2077               j++;
2078             }
2079           }
2080 
2081           /* check change of basis */
2082           if (pcbddc->dbg_flag) {
2083             PetscInt   ii,jj;
2084             PetscBool valid_qr=PETSC_TRUE;
2085             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2086             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2087             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2088             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2089             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2090             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2091             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2092             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2093             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2094             for (jj=0;jj<size_of_constraint;jj++) {
2095               for (ii=0;ii<primal_dofs;ii++) {
2096                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2097                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2098               }
2099             }
2100             if (!valid_qr) {
2101               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2102               for (jj=0;jj<size_of_constraint;jj++) {
2103                 for (ii=0;ii<primal_dofs;ii++) {
2104                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2105                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2106                   }
2107                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2108                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2109                   }
2110                 }
2111               }
2112             } else {
2113               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2114             }
2115           }
2116         } else { /* simple transformation block */
2117           PetscInt row,col;
2118           PetscScalar val;
2119           for (j=0;j<size_of_constraint;j++) {
2120             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2121             if (!PetscBTLookup(is_primal,row)) {
2122               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2123               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2124               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2125             } else {
2126               for (k=0;k<size_of_constraint;k++) {
2127                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2128                 if (row != col) {
2129                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2130                 } else {
2131                   val = 1.0;
2132                 }
2133                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2134               }
2135             }
2136           }
2137           if (pcbddc->dbg_flag) {
2138             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2139           }
2140         }
2141         /* increment primal counter */
2142         primal_counter += primal_dofs;
2143       } else {
2144         if (pcbddc->dbg_flag) {
2145           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2146         }
2147       }
2148       /* increment constraint counter total_counts */
2149       total_counts += primal_dofs;
2150     }
2151 
2152     /* free workspace */
2153     if (qr_needed) {
2154       if (pcbddc->dbg_flag) {
2155         ierr = PetscFree(work);CHKERRQ(ierr);
2156       }
2157       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2158       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2159       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2160       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2161       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2162     }
2163     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2164     /* assembling */
2165     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167     /*
2168     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2169     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2170     */
2171   }
2172   /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2173   if (pcbddc->user_ChangeOfBasisMatrix) {
2174     PetscInt rows,cols;
2175     ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr);
2176     if (rows == pcis->n && cols == pcis->n) {
2177       ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2178     } else {
2179       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2180       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2181     }
2182   }
2183 
2184   /* get indices in local ordering for vertices and constraints */
2185   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2186     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2187     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2188   }
2189   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2190   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2191   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2192   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2193   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2194   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2195   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2196   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2197   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2198   /* set quantities in PCBDDC data struct */
2199   pcbddc->n_actual_vertices = i;
2200   /* check if a new primal space has been introduced */
2201   pcbddc->new_primal_space_local = PETSC_TRUE;
2202   if (olocal_primal_size == pcbddc->local_primal_size) {
2203     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2204     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2205     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2206   }
2207   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2208   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2209 
2210   /* flush dbg viewer */
2211   if (pcbddc->dbg_flag) {
2212     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2213   }
2214 
2215   /* free workspace */
2216   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2217   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2218   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2219   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2220   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2221   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2222   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2223   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2229 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2230 {
2231   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2232   PC_IS       *pcis = (PC_IS*)pc->data;
2233   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2234   PetscInt    ierr,i,vertex_size;
2235   PetscViewer viewer=pcbddc->dbg_viewer;
2236 
2237   PetscFunctionBegin;
2238   /* Reset previously computed graph */
2239   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2240   /* Init local Graph struct */
2241   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2242 
2243   /* Check validity of the csr graph passed in by the user */
2244   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2245     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2246   }
2247 
2248   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2249   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2250     Mat mat_adj;
2251     const PetscInt *xadj,*adjncy;
2252     PetscBool flg_row=PETSC_TRUE;
2253 
2254     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2255     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2256     if (!flg_row) {
2257       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2258     }
2259     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2260     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2261     if (!flg_row) {
2262       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2263     }
2264     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2265   }
2266 
2267   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2268   vertex_size = 1;
2269   if (pcbddc->user_provided_isfordofs) {
2270     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2271       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2272       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2273         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2274         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2275       }
2276       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2277       pcbddc->n_ISForDofs = 0;
2278       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2279     }
2280     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2281     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2282   } else {
2283     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2284       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2285       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2286       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2287         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2288       }
2289     }
2290   }
2291 
2292   /* Setup of Graph */
2293   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2294     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2295   }
2296   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2297     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2298   }
2299   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2300 
2301   /* Graph's connected components analysis */
2302   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2303 
2304   /* print some info to stdout */
2305   if (pcbddc->dbg_flag) {
2306     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2307   }
2308 
2309   /* mark topography has done */
2310   pcbddc->recompute_topography = PETSC_FALSE;
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2316 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2317 {
2318   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2319   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2320   PetscErrorCode ierr;
2321 
2322   PetscFunctionBegin;
2323   n = 0;
2324   vertices = 0;
2325   if (pcbddc->ConstraintMatrix) {
2326     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2327     for (i=0;i<local_primal_size;i++) {
2328       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2329       if (size_of_constraint == 1) n++;
2330       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2331     }
2332     if (vertices_idx) {
2333       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2334       n = 0;
2335       for (i=0;i<local_primal_size;i++) {
2336         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2337         if (size_of_constraint == 1) {
2338           vertices[n++]=row_cmat_indices[0];
2339         }
2340         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2341       }
2342     }
2343   }
2344   *n_vertices = n;
2345   if (vertices_idx) *vertices_idx = vertices;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2351 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2352 {
2353   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2354   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2355   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2356   PetscBT        touched;
2357   PetscErrorCode ierr;
2358 
2359     /* This function assumes that the number of local constraints per connected component
2360        is not greater than the number of nodes defined for the connected component
2361        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2362   PetscFunctionBegin;
2363   n = 0;
2364   constraints_index = 0;
2365   if (pcbddc->ConstraintMatrix) {
2366     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2367     max_size_of_constraint = 0;
2368     for (i=0;i<local_primal_size;i++) {
2369       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2370       if (size_of_constraint > 1) {
2371         n++;
2372       }
2373       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2374       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2375     }
2376     if (constraints_idx) {
2377       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2378       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2379       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2380       n = 0;
2381       for (i=0;i<local_primal_size;i++) {
2382         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2383         if (size_of_constraint > 1) {
2384           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2385           /* find first untouched local node */
2386           j = 0;
2387           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2388           min_index = row_cmat_global_indices[j];
2389           min_loc = j;
2390           /* search the minimum among nodes not yet touched on the connected component
2391              since there can be more than one constraint on a single cc */
2392           for (j=1;j<size_of_constraint;j++) {
2393             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2394               min_index = row_cmat_global_indices[j];
2395               min_loc = j;
2396             }
2397           }
2398           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2399           constraints_index[n++] = row_cmat_indices[min_loc];
2400         }
2401         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2402       }
2403       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2404       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2405     }
2406   }
2407   *n_constraints = n;
2408   if (constraints_idx) *constraints_idx = constraints_index;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 /* the next two functions has been adapted from pcis.c */
2413 #undef __FUNCT__
2414 #define __FUNCT__ "PCBDDCApplySchur"
2415 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2416 {
2417   PetscErrorCode ierr;
2418   PC_IS          *pcis = (PC_IS*)(pc->data);
2419 
2420   PetscFunctionBegin;
2421   if (!vec2_B) { vec2_B = v; }
2422   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2423   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2424   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2425   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2426   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2432 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2433 {
2434   PetscErrorCode ierr;
2435   PC_IS          *pcis = (PC_IS*)(pc->data);
2436 
2437   PetscFunctionBegin;
2438   if (!vec2_B) { vec2_B = v; }
2439   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2440   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2441   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2442   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2443   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "PCBDDCSubsetNumbering"
2449 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2450 {
2451   Vec            local_vec,global_vec;
2452   IS             seqis,paris;
2453   VecScatter     scatter_ctx;
2454   PetscScalar    *array;
2455   PetscInt       *temp_global_dofs;
2456   PetscScalar    globalsum;
2457   PetscInt       i,j,s;
2458   PetscInt       nlocals,first_index,old_index,max_local;
2459   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2460   PetscMPIInt    *dof_sizes,*dof_displs;
2461   PetscBool      first_found;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   /* mpi buffers */
2466   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2467   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2468   j = ( !rank_prec_comm ? size_prec_comm : 0);
2469   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2470   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2471   /* get maximum size of subset */
2472   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2473   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2474   max_local = 0;
2475   for (i=0;i<n_local_dofs;i++) {
2476     if (max_local < temp_global_dofs[i] ) {
2477       max_local = temp_global_dofs[i];
2478     }
2479   }
2480   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2481   max_global++;
2482   max_local = 0;
2483   for (i=0;i<n_local_dofs;i++) {
2484     if (max_local < local_dofs[i] ) {
2485       max_local = local_dofs[i];
2486     }
2487   }
2488   max_local++;
2489   /* allocate workspace */
2490   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2491   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2492   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2493   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2494   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2495   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2496   /* create scatter */
2497   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2498   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2499   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2500   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2501   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2502   /* init array */
2503   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2504   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2505   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2506   if (local_dofs_mult) {
2507     for (i=0;i<n_local_dofs;i++) {
2508       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2509     }
2510   } else {
2511     for (i=0;i<n_local_dofs;i++) {
2512       array[local_dofs[i]]=1.0;
2513     }
2514   }
2515   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2516   /* scatter into global vec and get total number of global dofs */
2517   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2518   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2519   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2520   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2521   /* Fill global_vec with cumulative function for global numbering */
2522   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2523   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2524   nlocals = 0;
2525   first_index = -1;
2526   first_found = PETSC_FALSE;
2527   for (i=0;i<s;i++) {
2528     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2529       first_found = PETSC_TRUE;
2530       first_index = i;
2531     }
2532     nlocals += (PetscInt)PetscRealPart(array[i]);
2533   }
2534   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2535   if (!rank_prec_comm) {
2536     dof_displs[0]=0;
2537     for (i=1;i<size_prec_comm;i++) {
2538       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2539     }
2540   }
2541   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2542   if (first_found) {
2543     array[first_index] += (PetscScalar)nlocals;
2544     old_index = first_index;
2545     for (i=first_index+1;i<s;i++) {
2546       if (PetscRealPart(array[i]) > 0.1) {
2547         array[i] += array[old_index];
2548         old_index = i;
2549       }
2550     }
2551   }
2552   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2553   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2554   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2555   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2556   /* get global ordering of local dofs */
2557   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2558   if (local_dofs_mult) {
2559     for (i=0;i<n_local_dofs;i++) {
2560       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2561     }
2562   } else {
2563     for (i=0;i<n_local_dofs;i++) {
2564       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2565     }
2566   }
2567   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2568   /* free workspace */
2569   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2570   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2571   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2572   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2573   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2574   /* return pointer to global ordering of local dofs */
2575   *global_numbering_subset = temp_global_dofs;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2581 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2582 {
2583   PetscInt       i,j;
2584   PetscScalar    *alphas;
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   /* this implements stabilized Gram-Schmidt */
2589   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2590   for (i=0;i<n;i++) {
2591     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2592     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2593     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2594   }
2595   ierr = PetscFree(alphas);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatISGetMPIXAIJ"
2601 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatReuse reuse, Mat *M)
2602 {
2603   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2604   /* info on mat */
2605   /* ISLocalToGlobalMapping rmapping,cmapping; */
2606   PetscInt               bs,rows,cols;
2607   PetscInt               lrows,lcols;
2608   PetscInt               local_rows,local_cols;
2609   PetscBool              isdense,issbaij,issbaij_red;
2610   /* values insertion */
2611   PetscScalar            *array;
2612   PetscInt               *local_indices,*global_indices;
2613   /* work */
2614   PetscInt               i,j,index_row;
2615   PetscErrorCode         ierr;
2616 
2617   PetscFunctionBegin;
2618   /* MISSING CHECKS
2619     - rectangular case not covered (it is not allowed by MATIS)
2620   */
2621   /* get info from mat */
2622   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2623   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2624   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2625   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2626   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2627   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2628 
2629   /* work */
2630   ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr);
2631   for (i=0;i<local_rows;i++) local_indices[i]=i;
2632   /* map indices of local mat to global values */
2633   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2634   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2635   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2636 
2637   if (issbaij) {
2638     ierr = MatGetRowUpperTriangular(matis->A);CHKERRQ(ierr);
2639   }
2640 
2641   if (reuse == MAT_INITIAL_MATRIX) {
2642     Mat         new_mat;
2643     MatType     new_mat_type;
2644     Vec         vec_dnz,vec_onz;
2645     PetscScalar *my_dnz,*my_onz;
2646     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2647     PetscInt    index_col,owner;
2648     PetscMPIInt nsubdomains;
2649 
2650     /* determining new matrix type */
2651     ierr = MPI_Allreduce(&issbaij,&issbaij_red,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2652     if (issbaij_red) {
2653       new_mat_type = MATSBAIJ;
2654     } else {
2655       if (bs>1) {
2656         new_mat_type = MATBAIJ;
2657       } else {
2658         new_mat_type = MATAIJ;
2659       }
2660     }
2661 
2662     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2663     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2664     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2665     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2666     ierr = MatSetType(new_mat,new_mat_type);CHKERRQ(ierr);
2667     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2668     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2669 
2670     /*
2671       preallocation
2672     */
2673 
2674     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2675     /*
2676        Some vectors are needed to sum up properly on shared interface dofs.
2677        Preallocation macros cannot do the job.
2678        Note that preallocation is not exact, since it overestimates nonzeros
2679     */
2680     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2681     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2682     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2683     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2684     /* All processes need to compute entire row ownership */
2685     ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr);
2686     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2687     for (i=0;i<nsubdomains;i++) {
2688       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2689         row_ownership[j]=i;
2690       }
2691     }
2692 
2693     /*
2694        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2695        then, they will be summed up properly. This way, preallocation is always sufficient
2696     */
2697     ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr);
2698     ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr);
2699     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2700     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2701     /* preallocation as a MATAIJ */
2702     if (isdense) { /* special case for dense local matrices */
2703       for (i=0;i<local_rows;i++) {
2704         index_row = global_indices[i];
2705         for (j=i;j<local_rows;j++) {
2706           owner = row_ownership[index_row];
2707           index_col = global_indices[j];
2708           if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2709             my_dnz[i] += 1.0;
2710           } else { /* offdiag block */
2711             my_onz[i] += 1.0;
2712           }
2713           /* same as before, interchanging rows and cols */
2714           if (i != j) {
2715             owner = row_ownership[index_col];
2716             if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2717               my_dnz[j] += 1.0;
2718             } else {
2719               my_onz[j] += 1.0;
2720             }
2721           }
2722         }
2723       }
2724     } else {
2725       for (i=0;i<local_rows;i++) {
2726         PetscInt ncols;
2727         const PetscInt *cols;
2728         index_row = global_indices[i];
2729         ierr = MatGetRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr);
2730         for (j=0;j<ncols;j++) {
2731           owner = row_ownership[index_row];
2732           index_col = global_indices[cols[j]];
2733           if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2734             my_dnz[i] += 1.0;
2735           } else { /* offdiag block */
2736             my_onz[i] += 1.0;
2737           }
2738           /* same as before, interchanging rows and cols */
2739           if (issbaij) {
2740             owner = row_ownership[index_col];
2741             if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2742               my_dnz[j] += 1.0;
2743             } else {
2744               my_onz[j] += 1.0;
2745             }
2746           }
2747         }
2748         ierr = MatRestoreRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr);
2749       }
2750     }
2751     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2752     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2753     if (local_rows) { /* multilevel guard */
2754       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2755       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2756     }
2757     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2758     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2759     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2760     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2761     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2762     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2763     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2764 
2765     /* set computed preallocation in dnz and onz */
2766     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2767     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2768     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2769     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2770     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2771     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2772     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2773     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2774 
2775     /* Resize preallocation if overestimated */
2776     for (i=0;i<lrows;i++) {
2777       dnz[i] = PetscMin(dnz[i],lcols);
2778       onz[i] = PetscMin(onz[i],cols-lcols);
2779     }
2780     /* set preallocation */
2781     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2782     for (i=0;i<lrows/bs;i++) {
2783       dnz[i] = dnz[i*bs]/bs;
2784       onz[i] = onz[i*bs]/bs;
2785     }
2786     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2787     for (i=0;i<lrows/bs;i++) {
2788       dnz[i] = dnz[i]-i;
2789     }
2790     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2791     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2792     *M = new_mat;
2793   } else {
2794     PetscInt mbs,mrows,mcols;
2795     /* some checks */
2796     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2797     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2798     if (mrows != rows) {
2799       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2800     }
2801     if (mrows != rows) {
2802       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2803     }
2804     if (mbs != bs) {
2805       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2806     }
2807     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2808   }
2809   /* set local to global mappings */
2810   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2811   /* Set values */
2812   if (isdense) { /* special case for dense local matrices */
2813     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2814     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2815     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2816     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2817     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2818     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2819   } else { /* very basic values insertion for all other matrix types */
2820     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2821     for (i=0;i<local_rows;i++) {
2822       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2823       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2824       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2825       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2826       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2827       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2828     }
2829     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2830   }
2831   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2832   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2833   if (isdense) {
2834     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2835   }
2836   if (issbaij) {
2837     ierr = MatRestoreRowUpperTriangular(matis->A);CHKERRQ(ierr);
2838   }
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatISGetSubassemblingPattern"
2844 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2845 {
2846   Mat             subdomain_adj;
2847   IS              new_ranks,ranks_send_to;
2848   MatPartitioning partitioner;
2849   Mat_IS          *matis;
2850   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2851   PetscInt        prank;
2852   PetscMPIInt     size,rank,color;
2853   PetscInt        *xadj,*adjncy,*oldranks;
2854   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2855   PetscInt        i,j,local_size,threshold=0;
2856   PetscErrorCode  ierr;
2857   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2858   PetscSubcomm    subcomm;
2859 
2860   PetscFunctionBegin;
2861   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2862   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2863   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2864 
2865   /* Get info on mapping */
2866   matis = (Mat_IS*)(mat->data);
2867   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2868   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2869 
2870   /* build local CSR graph of subdomains' connectivity */
2871   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2872   xadj[0] = 0;
2873   xadj[1] = PetscMax(n_neighs-1,0);
2874   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2875   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2876 
2877   if (threshold) {
2878     PetscInt* count,min_threshold;
2879     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2880     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2881     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2882       for (j=0;j<n_shared[i];j++) {
2883         count[shared[i][j]] += 1;
2884       }
2885     }
2886     /* adapt threshold since we dont want to lose significant connections */
2887     min_threshold = n_neighs;
2888     for (i=1;i<n_neighs;i++) {
2889       for (j=0;j<n_shared[i];j++) {
2890         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2891       }
2892     }
2893     threshold = PetscMax(min_threshold+1,threshold);
2894     xadj[1] = 0;
2895     for (i=1;i<n_neighs;i++) {
2896       for (j=0;j<n_shared[i];j++) {
2897         if (count[shared[i][j]] < threshold) {
2898           adjncy[xadj[1]] = neighs[i];
2899           adjncy_wgt[xadj[1]] = n_shared[i];
2900           xadj[1]++;
2901           break;
2902         }
2903       }
2904     }
2905     ierr = PetscFree(count);CHKERRQ(ierr);
2906   } else {
2907     if (xadj[1]) {
2908       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2909       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2910     }
2911   }
2912   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2913   if (use_square) {
2914     for (i=0;i<xadj[1];i++) {
2915       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2916     }
2917   }
2918   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2919 
2920   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2921 
2922   /*
2923     Restrict work on active processes only.
2924   */
2925   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2926   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2927   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2928   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2929   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2930   if (color) {
2931     ierr = PetscFree(xadj);CHKERRQ(ierr);
2932     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2933     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2934   } else {
2935     PetscInt coarsening_ratio;
2936     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2937     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2938     prank = rank;
2939     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2940     /*
2941     for (i=0;i<size;i++) {
2942       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2943     }
2944     */
2945     for (i=0;i<xadj[1];i++) {
2946       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2947     }
2948     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2949     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2950     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2951 
2952     /* Partition */
2953     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2954     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2955     if (use_vwgt) {
2956       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2957       v_wgt[0] = local_size;
2958       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2959     }
2960     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2961     coarsening_ratio = size/n_subdomains;
2962     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2963     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2964     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2965     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2966     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2967 
2968     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2969     if (contiguous) {
2970       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2971     } else {
2972       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2973     }
2974     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2975     /* clean up */
2976     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2977     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2978     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2979     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2980   }
2981   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2982 
2983   /* assemble parallel IS for sends */
2984   i = 1;
2985   if (color) i=0;
2986   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2987 
2988   /* get back IS */
2989   *is_sends = ranks_send_to;
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2994 
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatISSubassemble"
2997 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2998 {
2999   Mat                    local_mat;
3000   Mat_IS                 *matis;
3001   IS                     is_sends_internal;
3002   PetscInt               rows,cols;
3003   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3004   PetscBool              ismatis,isdense,destroy_mat;
3005   ISLocalToGlobalMapping l2gmap;
3006   PetscInt*              l2gmap_indices;
3007   const PetscInt*        is_indices;
3008   MatType                new_local_type;
3009   /* buffers */
3010   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3011   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3012   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3013   /* MPI */
3014   MPI_Comm               comm,comm_n;
3015   PetscSubcomm           subcomm;
3016   PetscMPIInt            n_sends,n_recvs,commsize;
3017   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3018   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3019   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3020   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3021   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3022   PetscErrorCode         ierr;
3023 
3024   PetscFunctionBegin;
3025   /* TODO: add missing checks */
3026   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3027   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3028   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3029   PetscValidLogicalCollectiveInt(mat,nis,7);
3030   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3031   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3032   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3033   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3034   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3035   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3036   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3037   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3038     PetscInt mrows,mcols,mnrows,mncols;
3039     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3040     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3041     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3042     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3043     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3044     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3045   }
3046   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3047   PetscValidLogicalCollectiveInt(mat,bs,0);
3048   /* prepare IS for sending if not provided */
3049   if (!is_sends) {
3050     PetscBool pcontig = PETSC_TRUE;
3051     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3052     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3053   } else {
3054     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3055     is_sends_internal = is_sends;
3056   }
3057 
3058   /* get pointer of MATIS data */
3059   matis = (Mat_IS*)mat->data;
3060 
3061   /* get comm */
3062   comm = PetscObjectComm((PetscObject)mat);
3063 
3064   /* compute number of sends */
3065   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3066   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3067 
3068   /* compute number of receives */
3069   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3070   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3071   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3072   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3073   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3074   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3075   ierr = PetscFree(iflags);CHKERRQ(ierr);
3076 
3077   /* restrict comm if requested */
3078   subcomm = 0;
3079   destroy_mat = PETSC_FALSE;
3080   if (restrict_comm) {
3081     PetscMPIInt color,rank,subcommsize;
3082     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3083     color = 0;
3084     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3085     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3086     subcommsize = commsize - subcommsize;
3087     /* check if reuse has been requested */
3088     if (reuse == MAT_REUSE_MATRIX) {
3089       if (*mat_n) {
3090         PetscMPIInt subcommsize2;
3091         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3092         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3093         comm_n = PetscObjectComm((PetscObject)*mat_n);
3094       } else {
3095         comm_n = PETSC_COMM_SELF;
3096       }
3097     } else { /* MAT_INITIAL_MATRIX */
3098       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3099       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3100       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3101       comm_n = subcomm->comm;
3102     }
3103     /* flag to destroy *mat_n if not significative */
3104     if (color) destroy_mat = PETSC_TRUE;
3105   } else {
3106     comm_n = comm;
3107   }
3108 
3109   /* prepare send/receive buffers */
3110   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3111   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3112   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3113   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3114   if (nis) {
3115     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3116     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3117   }
3118 
3119   /* Get data from local matrices */
3120   if (!isdense) {
3121     /* TODO: See below some guidelines on how to prepare the local buffers */
3122     /*
3123        send_buffer_vals should contain the raw values of the local matrix
3124        send_buffer_idxs should contain:
3125        - MatType_PRIVATE type
3126        - PetscInt        size_of_l2gmap
3127        - PetscInt        global_row_indices[size_of_l2gmap]
3128        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3129     */
3130   } else {
3131     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3132     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3133     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
3134     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3135     send_buffer_idxs[1] = i;
3136     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3137     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3138     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3139     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3140     for (i=0;i<n_sends;i++) {
3141       ilengths_vals[is_indices[i]] = len*len;
3142       ilengths_idxs[is_indices[i]] = len+2;
3143     }
3144   }
3145   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3146   /* additional is (if any) */
3147   if (nis) {
3148     PetscMPIInt psum;
3149     PetscInt j;
3150     for (j=0,psum=0;j<nis;j++) {
3151       PetscInt plen;
3152       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3153       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3154       psum += len+1; /* indices + lenght */
3155     }
3156     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3157     for (j=0,psum=0;j<nis;j++) {
3158       PetscInt plen;
3159       const PetscInt *is_array_idxs;
3160       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3161       send_buffer_idxs_is[psum] = plen;
3162       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3163       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3164       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3165       psum += plen+1; /* indices + lenght */
3166     }
3167     for (i=0;i<n_sends;i++) {
3168       ilengths_idxs_is[is_indices[i]] = psum;
3169     }
3170     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3171   }
3172 
3173   buf_size_idxs = 0;
3174   buf_size_vals = 0;
3175   buf_size_idxs_is = 0;
3176   for (i=0;i<n_recvs;i++) {
3177     buf_size_idxs += (PetscInt)olengths_idxs[i];
3178     buf_size_vals += (PetscInt)olengths_vals[i];
3179     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3180   }
3181   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3182   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3183   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3184 
3185   /* get new tags for clean communications */
3186   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3187   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3188   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3189 
3190   /* allocate for requests */
3191   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3192   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3193   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3195   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3196   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3197 
3198   /* communications */
3199   ptr_idxs = recv_buffer_idxs;
3200   ptr_vals = recv_buffer_vals;
3201   ptr_idxs_is = recv_buffer_idxs_is;
3202   for (i=0;i<n_recvs;i++) {
3203     source_dest = onodes[i];
3204     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3205     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3206     ptr_idxs += olengths_idxs[i];
3207     ptr_vals += olengths_vals[i];
3208     if (nis) {
3209       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3210       ptr_idxs_is += olengths_idxs_is[i];
3211     }
3212   }
3213   for (i=0;i<n_sends;i++) {
3214     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3215     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3216     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3217     if (nis) {
3218       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3219     }
3220   }
3221   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3222   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3223 
3224   /* assemble new l2g map */
3225   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3226   ptr_idxs = recv_buffer_idxs;
3227   buf_size_idxs = 0;
3228   for (i=0;i<n_recvs;i++) {
3229     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3230     ptr_idxs += olengths_idxs[i];
3231   }
3232   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3233   ptr_idxs = recv_buffer_idxs;
3234   buf_size_idxs = 0;
3235   for (i=0;i<n_recvs;i++) {
3236     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3237     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3238     ptr_idxs += olengths_idxs[i];
3239   }
3240   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3241   ierr = ISLocalToGlobalMappingCreate(comm,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3242   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3243 
3244   /* infer new local matrix type from received local matrices type */
3245   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3246   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3247   if (n_recvs) {
3248     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3249     ptr_idxs = recv_buffer_idxs;
3250     for (i=0;i<n_recvs;i++) {
3251       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3252         new_local_type_private = MATAIJ_PRIVATE;
3253         break;
3254       }
3255       ptr_idxs += olengths_idxs[i];
3256     }
3257     switch (new_local_type_private) {
3258       case MATDENSE_PRIVATE:
3259         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3260           new_local_type = MATSEQAIJ;
3261           bs = 1;
3262         } else { /* if I receive only 1 dense matrix */
3263           new_local_type = MATSEQDENSE;
3264           bs = 1;
3265         }
3266         break;
3267       case MATAIJ_PRIVATE:
3268         new_local_type = MATSEQAIJ;
3269         bs = 1;
3270         break;
3271       case MATBAIJ_PRIVATE:
3272         new_local_type = MATSEQBAIJ;
3273         break;
3274       case MATSBAIJ_PRIVATE:
3275         new_local_type = MATSEQSBAIJ;
3276         break;
3277       default:
3278         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3279         break;
3280     }
3281   } else { /* by default, new_local_type is seqdense */
3282     new_local_type = MATSEQDENSE;
3283     bs = 1;
3284   }
3285 
3286   /* create MATIS object if needed */
3287   if (reuse == MAT_INITIAL_MATRIX) {
3288     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3289     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3290   } else {
3291     /* it also destroys the local matrices */
3292     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3293   }
3294   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3295   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3296   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3297   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3298 
3299   /* set values */
3300   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3301   ptr_vals = recv_buffer_vals;
3302   ptr_idxs = recv_buffer_idxs;
3303   for (i=0;i<n_recvs;i++) {
3304     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3305       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3306       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3307       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3308       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3309       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3310     } else {
3311       /* TODO */
3312     }
3313     ptr_idxs += olengths_idxs[i];
3314     ptr_vals += olengths_vals[i];
3315   }
3316   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3318   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3319   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3320 
3321 #if 0
3322   if (!restrict_comm) { /* check */
3323     Vec       lvec,rvec;
3324     PetscReal infty_error;
3325 
3326     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3327     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3328     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3329     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3330     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3331     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3332     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3333     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3334     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3335   }
3336 #endif
3337 
3338   /* assemble new additional is (if any) */
3339   if (nis) {
3340     PetscInt **temp_idxs,*count_is,j,psum;
3341 
3342     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3343     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3344     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3345     ptr_idxs = recv_buffer_idxs_is;
3346     psum = 0;
3347     for (i=0;i<n_recvs;i++) {
3348       for (j=0;j<nis;j++) {
3349         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3350         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3351         psum += plen;
3352         ptr_idxs += plen+1; /* shift pointer to received data */
3353       }
3354     }
3355     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3356     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3357     for (i=1;i<nis;i++) {
3358       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3359     }
3360     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3361     ptr_idxs = recv_buffer_idxs_is;
3362     for (i=0;i<n_recvs;i++) {
3363       for (j=0;j<nis;j++) {
3364         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3365         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3366         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3367         ptr_idxs += plen+1; /* shift pointer to received data */
3368       }
3369     }
3370     for (i=0;i<nis;i++) {
3371       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3372       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3373       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3374     }
3375     ierr = PetscFree(count_is);CHKERRQ(ierr);
3376     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3377     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3378   }
3379   /* free workspace */
3380   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3381   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3382   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3383   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3384   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3385   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3386   if (isdense) {
3387     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3388     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3389   } else {
3390     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3391   }
3392   if (nis) {
3393     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3394     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3395   }
3396   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3397   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3398   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3399   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3400   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3401   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3402   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3403   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3404   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3405   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3406   ierr = PetscFree(onodes);CHKERRQ(ierr);
3407   if (nis) {
3408     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3409     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3410     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3411   }
3412   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3413   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3414     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3415     for (i=0;i<nis;i++) {
3416       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3417     }
3418   }
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 /* temporary hack into ksp private data structure */
3423 #include <petsc-private/kspimpl.h>
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3427 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3428 {
3429   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3430   PC_IS                  *pcis = (PC_IS*)pc->data;
3431   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3432   MatNullSpace           CoarseNullSpace=NULL;
3433   ISLocalToGlobalMapping coarse_islg;
3434   IS                     coarse_is,*isarray;
3435   PetscInt               i,im_active=-1,active_procs=-1;
3436   PetscInt               nis,nisdofs,nisneu;
3437   PC                     pc_temp;
3438   PCType                 coarse_pc_type;
3439   KSPType                coarse_ksp_type;
3440   PetscBool              multilevel_requested,multilevel_allowed;
3441   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3442   Mat                    t_coarse_mat_is;
3443   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3444   PetscMPIInt            all_procs;
3445   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3446   PetscBool              compute_vecs = PETSC_FALSE;
3447   PetscErrorCode         ierr;
3448 
3449   PetscFunctionBegin;
3450   /* Assign global numbering to coarse dofs */
3451   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3452     compute_vecs = PETSC_TRUE;
3453     PetscInt ocoarse_size;
3454     ocoarse_size = pcbddc->coarse_size;
3455     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3456     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3457     /* see if we can avoid some work */
3458     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3459       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3460         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3461         coarse_reuse = PETSC_FALSE;
3462       } else { /* we can safely reuse already computed coarse matrix */
3463         coarse_reuse = PETSC_TRUE;
3464       }
3465     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3466       coarse_reuse = PETSC_FALSE;
3467     }
3468     /* reset any subassembling information */
3469     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3470     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3471   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3472     coarse_reuse = PETSC_TRUE;
3473   }
3474 
3475   /* count "active" (i.e. with positive local size) and "void" processes */
3476   im_active = !!(pcis->n);
3477   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3478   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3479   void_procs = all_procs-active_procs;
3480   csin_type_simple = PETSC_TRUE;
3481   if (pcbddc->current_level) {
3482     csin_ml = PETSC_TRUE;
3483     ncoarse_ml = void_procs;
3484     csin_ds = PETSC_TRUE;
3485     ncoarse_ds = void_procs;
3486     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3487   } else {
3488     csin_ml = PETSC_FALSE;
3489     ncoarse_ml = all_procs;
3490     if (void_procs) {
3491       csin_ds = PETSC_TRUE;
3492       ncoarse_ds = void_procs;
3493       csin_type_simple = PETSC_FALSE;
3494     } else {
3495       csin_ds = PETSC_FALSE;
3496       ncoarse_ds = all_procs;
3497     }
3498   }
3499 
3500   /*
3501     test if we can go multilevel: three conditions must be satisfied:
3502     - we have not exceeded the number of levels requested
3503     - we can actually subassemble the active processes
3504     - we can find a suitable number of MPI processes where we can place the subassembled problem
3505   */
3506   multilevel_allowed = PETSC_FALSE;
3507   multilevel_requested = PETSC_FALSE;
3508   if (pcbddc->current_level < pcbddc->max_levels) {
3509     multilevel_requested = PETSC_TRUE;
3510     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3511       multilevel_allowed = PETSC_FALSE;
3512     } else {
3513       multilevel_allowed = PETSC_TRUE;
3514     }
3515   }
3516   /* determine number of process partecipating to coarse solver */
3517   if (multilevel_allowed) {
3518     ncoarse = ncoarse_ml;
3519     csin = csin_ml;
3520   } else {
3521     ncoarse = ncoarse_ds;
3522     csin = csin_ds;
3523   }
3524 
3525   /* creates temporary l2gmap and IS for coarse indexes */
3526   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3527   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3528 
3529   /* creates temporary MATIS object for coarse matrix */
3530   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3531 #if 0
3532   {
3533     PetscViewer viewer;
3534     char filename[256];
3535     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3536     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3537     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3538     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3539     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3540   }
3541 #endif
3542   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3543   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3544   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3545   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3546   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3547 
3548   /* compute dofs splitting and neumann boundaries for coarse dofs */
3549   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3550     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3551     const PetscInt         *idxs;
3552     ISLocalToGlobalMapping tmap;
3553 
3554     /* create map between primal indices (in local representative ordering) and local primal numbering */
3555     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3556     /* allocate space for temporary storage */
3557     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3558     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3559     /* allocate for IS array */
3560     nisdofs = pcbddc->n_ISForDofsLocal;
3561     nisneu = !!pcbddc->NeumannBoundariesLocal;
3562     nis = nisdofs + nisneu;
3563     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3564     /* dofs splitting */
3565     for (i=0;i<nisdofs;i++) {
3566       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3567       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3568       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3569       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3570       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3571       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3572       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3573       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3574     }
3575     /* neumann boundaries */
3576     if (pcbddc->NeumannBoundariesLocal) {
3577       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3578       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3579       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3580       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3581       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3582       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3583       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3584       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3585     }
3586     /* free memory */
3587     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3588     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3589     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3590   } else {
3591     nis = 0;
3592     nisdofs = 0;
3593     nisneu = 0;
3594     isarray = NULL;
3595   }
3596   /* destroy no longer needed map */
3597   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3598 
3599   /* restrict on coarse candidates (if needed) */
3600   coarse_mat_is = NULL;
3601   if (csin) {
3602     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3603       PetscInt j,tissize,*nisindices;
3604       PetscInt *coarse_candidates;
3605       const PetscInt* tisindices;
3606       /* get coarse candidates' ranks in pc communicator */
3607       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3608       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3609       for (i=0,j=0;i<all_procs;i++) {
3610         if (!coarse_candidates[i]) {
3611           coarse_candidates[j]=i;
3612           j++;
3613         }
3614       }
3615       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3616       /* get a suitable subassembling pattern */
3617       if (csin_type_simple) {
3618         PetscMPIInt rank;
3619         PetscInt    issize,isidx;
3620         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3621         if (im_active) {
3622           issize = 1;
3623           isidx = (PetscInt)rank;
3624         } else {
3625           issize = 0;
3626           isidx = -1;
3627         }
3628         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3629       } else {
3630         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3631       }
3632       if (pcbddc->dbg_flag) {
3633         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3634         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3635         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3636         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3637         for (i=0;i<j;i++) {
3638           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3639         }
3640         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3641         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3642       }
3643       /* shift the pattern on coarse candidates */
3644       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3645       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3646       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3647       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3648       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3649       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3650       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3651     }
3652     if (pcbddc->dbg_flag) {
3653       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3654       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3655       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3656       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3657     }
3658     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3659     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3660   } else {
3661     if (pcbddc->dbg_flag) {
3662       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3663       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3664       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3665     }
3666     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3667     coarse_mat_is = t_coarse_mat_is;
3668   }
3669 
3670   /* create local to global scatters for coarse problem */
3671   if (compute_vecs) {
3672     PetscInt lrows;
3673     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3674     if (coarse_mat_is) {
3675       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3676     } else {
3677       lrows = 0;
3678     }
3679     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3680     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3681     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3682     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3683     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3684   }
3685   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3686   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3687 
3688   /* set defaults for coarse KSP and PC */
3689   if (multilevel_allowed) {
3690     coarse_ksp_type = KSPRICHARDSON;
3691     coarse_pc_type = PCBDDC;
3692   } else {
3693     coarse_ksp_type = KSPPREONLY;
3694     coarse_pc_type = PCREDUNDANT;
3695   }
3696 
3697   /* print some info if requested */
3698   if (pcbddc->dbg_flag) {
3699     if (!multilevel_allowed) {
3700       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3701       if (multilevel_requested) {
3702         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3703       } else if (pcbddc->max_levels) {
3704         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3705       }
3706       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3707     }
3708   }
3709 
3710   /* create the coarse KSP object only once with defaults */
3711   if (coarse_mat_is) {
3712     MatReuse coarse_mat_reuse;
3713     PetscViewer dbg_viewer;
3714     if (pcbddc->dbg_flag) {
3715       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3716       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3717     }
3718     if (!pcbddc->coarse_ksp) {
3719       char prefix[256],str_level[16];
3720       size_t len;
3721       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3722       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3723       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3724       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3725       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3726       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3727       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3728       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3729       /* prefix */
3730       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3731       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3732       if (!pcbddc->current_level) {
3733         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3734         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3735       } else {
3736         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3737         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3738         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3739         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3740         *(prefix+len)='\0';
3741         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3742         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3743       }
3744       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3745       /* allow user customization */
3746       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3747       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3748     }
3749 
3750     /* get some info after set from options */
3751     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3752     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3753     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3754     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3755     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3756       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3757       isbddc = PETSC_FALSE;
3758     }
3759     if (isredundant) {
3760       KSP inner_ksp;
3761       PC inner_pc;
3762       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3763       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3764       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3765     }
3766 
3767     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3768     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3769     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3770     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3771     if (nisdofs) {
3772       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3773       for (i=0;i<nisdofs;i++) {
3774         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3775       }
3776     }
3777     if (nisneu) {
3778       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3779       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3780     }
3781 
3782     /* assemble coarse matrix */
3783     if (coarse_reuse) {
3784       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3785       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3786       coarse_mat_reuse = MAT_REUSE_MATRIX;
3787     } else {
3788       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3789     }
3790     if (isbddc || isnn) {
3791       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3792         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3793         if (pcbddc->dbg_flag) {
3794           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3795           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3796           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3797           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3798         }
3799       }
3800       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3801     } else {
3802       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3803     }
3804     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3805 
3806     /* propagate symmetry info to coarse matrix */
3807     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3808     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3809 
3810     /* set operators */
3811     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3812     if (pcbddc->dbg_flag) {
3813       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3814     }
3815   } else { /* processes non partecipating to coarse solver (if any) */
3816     coarse_mat = 0;
3817   }
3818   ierr = PetscFree(isarray);CHKERRQ(ierr);
3819 #if 0
3820   {
3821     PetscViewer viewer;
3822     char filename[256];
3823     sprintf(filename,"coarse_mat.m");
3824     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3825     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3826     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3827     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3828   }
3829 #endif
3830 
3831   /* Compute coarse null space (special handling by BDDC only) */
3832   if (pcbddc->NullSpace) {
3833     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3834   }
3835 
3836   if (pcbddc->coarse_ksp) {
3837     Vec crhs,csol;
3838     PetscBool ispreonly;
3839     if (CoarseNullSpace) {
3840       if (isbddc) {
3841         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3842       } else {
3843         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3844       }
3845     }
3846     /* setup coarse ksp */
3847     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3848     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3849     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3850     /* hack */
3851     if (!csol) {
3852       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3853     }
3854     if (!crhs) {
3855       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3856     }
3857     /* Check coarse problem if in debug mode or if solving with an iterative method */
3858     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3859     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3860       KSP       check_ksp;
3861       KSPType   check_ksp_type;
3862       PC        check_pc;
3863       Vec       check_vec,coarse_vec;
3864       PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3865       PetscInt  its;
3866       PetscBool compute_eigs;
3867       PetscReal *eigs_r,*eigs_c;
3868       PetscInt  neigs;
3869       const char *prefix;
3870 
3871       /* Create ksp object suitable for estimation of extreme eigenvalues */
3872       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3873       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3874       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3875       if (ispreonly) {
3876         check_ksp_type = KSPPREONLY;
3877         compute_eigs = PETSC_FALSE;
3878       } else {
3879         check_ksp_type = KSPGMRES;
3880         compute_eigs = PETSC_TRUE;
3881       }
3882       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3883       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3884       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3885       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3886       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3887       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3888       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3889       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3890       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3891       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3892       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3893       /* create random vec */
3894       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3895       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3896       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3897       if (CoarseNullSpace) {
3898         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3899       }
3900       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3901       /* solve coarse problem */
3902       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3903       if (CoarseNullSpace) {
3904         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3905       }
3906       /* set eigenvalue estimation if preonly has not been requested */
3907       if (compute_eigs) {
3908         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3909         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3910         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3911         lambda_max = eigs_r[neigs-1];
3912         lambda_min = eigs_r[0];
3913         if (pcbddc->use_coarse_estimates) {
3914           if (lambda_max>lambda_min) {
3915             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3916             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3917           }
3918         }
3919       }
3920 
3921       /* check coarse problem residual error */
3922       if (pcbddc->dbg_flag) {
3923         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3924         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3925         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3926         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3927         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3928         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3929         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3930         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3931         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3932         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3933         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3934         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3935         if (compute_eigs) {
3936           PetscReal lambda_max_s,lambda_min_s;
3937           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3938           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3939           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3940           for (i=0;i<neigs;i++) {
3941             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3942           }
3943         }
3944         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3945         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3946       }
3947       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3948       if (compute_eigs) {
3949         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3950         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3951       }
3952     }
3953   }
3954   /* print additional info */
3955   if (pcbddc->dbg_flag) {
3956     /* waits until all processes reaches this point */
3957     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3958     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3960   }
3961 
3962   /* free memory */
3963   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3964   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 #undef __FUNCT__
3969 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3970 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3971 {
3972   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3973   PC_IS*         pcis = (PC_IS*)pc->data;
3974   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3975   PetscInt       i,coarse_size;
3976   PetscInt       *local_primal_indices;
3977   PetscErrorCode ierr;
3978 
3979   PetscFunctionBegin;
3980   /* Compute global number of coarse dofs */
3981   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3982     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3983   }
3984   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3985 
3986   /* check numbering */
3987   if (pcbddc->dbg_flag) {
3988     PetscScalar coarsesum,*array;
3989     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3990 
3991     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3993     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3994     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3995     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3996     for (i=0;i<pcbddc->local_primal_size;i++) {
3997       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3998     }
3999     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4000     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4001     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4002     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4003     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4004     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4005     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4006     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4007     for (i=0;i<pcis->n;i++) {
4008       if (array[i] == 1.0) {
4009         set_error = PETSC_TRUE;
4010         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4011       }
4012     }
4013     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4014     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4015     for (i=0;i<pcis->n;i++) {
4016       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4017     }
4018     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4019     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4020     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4021     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4022     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4023     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4024     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4025       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4026       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4027       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4028       for (i=0;i<pcbddc->local_primal_size;i++) {
4029         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4030       }
4031       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4032     }
4033     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4034     if (set_error_reduced) {
4035       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4036     }
4037   }
4038   /* get back data */
4039   *coarse_size_n = coarse_size;
4040   *local_primal_indices_n = local_primal_indices;
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 #undef __FUNCT__
4045 #define __FUNCT__ "PCBDDCGlobalToLocal"
4046 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4047 {
4048   IS             localis_t;
4049   PetscInt       i,lsize,*idxs,n;
4050   PetscScalar    *vals;
4051   PetscErrorCode ierr;
4052 
4053   PetscFunctionBegin;
4054   /* get indices in local ordering exploiting local to global map */
4055   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4056   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
4057   for (i=0;i<lsize;i++) vals[i] = 1.0;
4058   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4059   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4060   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4061   if (idxs) { /* multilevel guard */
4062     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4063   }
4064   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4065   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4066   ierr = PetscFree(vals);CHKERRQ(ierr);
4067   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4068   /* now compute set in local ordering */
4069   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4070   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4071   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4072   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4073   for (i=0,lsize=0;i<n;i++) {
4074     if (PetscRealPart(vals[i]) > 0.5) {
4075       lsize++;
4076     }
4077   }
4078   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
4079   for (i=0,lsize=0;i<n;i++) {
4080     if (PetscRealPart(vals[i]) > 0.5) {
4081       idxs[lsize++] = i;
4082     }
4083   }
4084   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4085   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4086   *localis = localis_t;
4087   PetscFunctionReturn(0);
4088 }
4089