xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision fb180af422914b0a76b4b5b1d69c7e421c5d76e8)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
23 
24   /* Change global null space passed in by the user if change of basis has been requested */
25   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
26     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
27   }
28 
29   /*
30      Setup local correction and local part of coarse basis.
31      Gives back the dense local part of the coarse matrix in column major ordering
32   */
33   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
34 
35   /* Compute total number of coarse nodes and setup coarse solver */
36   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
37 
38   /* free */
39   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCBDDCResetCustomization"
45 PetscErrorCode PCBDDCResetCustomization(PC pc)
46 {
47   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
48   PetscInt       i;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
57   for (i=0;i<pcbddc->n_ISForDofs;i++) {
58     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
59   }
60   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
61   pcbddc->n_ISForDofs = 0;
62   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
63   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
116 {
117   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
118   PC_IS          *pcis = (PC_IS*)pc->data;
119   VecType        impVecType;
120   PetscInt       n_vertices,n_constraints,local_primal_size,n_R,old_size;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   if (!pcbddc->ConstraintMatrix) {
125     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
126   }
127   /* get sizes */
128   ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr);
129   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
130   n_constraints = local_primal_size - n_vertices;
131   n_R = pcis->n-n_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   ISLocalToGlobalMapping BtoNmap;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *vertices,*idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices and their local indices */
205   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
220   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
222   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
223   if (i != n_vertices) {
224     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
225   }
226 
227   /* Precompute stuffs needed for preprocessing and application of BDDC*/
228   if (n_constraints) {
229     /* see if we can save some allocations */
230     if (pcbddc->local_auxmat2) {
231       PetscInt on_R,on_constraints;
232       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
233       if (on_R != n_R || on_constraints != n_constraints) {
234         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
235         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
236       }
237     }
238     /* work vectors */
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
240     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
241     /* auxiliary matrices */
242     if (!pcbddc->local_auxmat2) {
243       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
244       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
245       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
246       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
247     }
248 
249     /* Extract constraints on R nodes: C_{CR}  */
250     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
251     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
252     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
253 
254     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
255     for (i=0;i<n_constraints;i++) {
256       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
257       /* Get row of constraint matrix in R numbering */
258       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
260       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
261       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
262       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
263       /* Solve for row of constraint matrix in R numbering */
264       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
265       /* Set values in local_auxmat2 */
266       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
268       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
269     }
270     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
273 
274     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
275     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
276     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
277     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
278     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
279     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
280     ierr = MatSetUp(M1);CHKERRQ(ierr);
281     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
282     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
283     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
284     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
285     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
286     ierr = MatDestroy(&M2);CHKERRQ(ierr);
287     ierr = MatDestroy(&M3);CHKERRQ(ierr);
288     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
289     if (!pcbddc->local_auxmat1) {
290       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     } else {
292       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
293     }
294   }
295 
296   /* Get submatrices from subdomain matrix */
297   if (n_vertices) {
298     PetscInt ibs,mbs;
299     PetscBool issbaij;
300     Mat newmat;
301 
302     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
303     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
304     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
305     if (ibs != mbs) { /* need to convert to SEQAIJ */
306       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
309       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
310       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
311     } else {
312       /* this is safe */
313       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
314       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
315       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
316         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
317         /* which of the two approaches is faster? */
318         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
320         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
321         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
322         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
323       } else {
324         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
325         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
326       }
327     }
328     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
329     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
330     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
331   }
332 
333   /* Matrix of coarse basis functions (local) */
334   if (pcbddc->coarse_phi_B) {
335     PetscInt on_B,on_primal;
336     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
337     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
338       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
339       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
340     }
341   }
342   if (pcbddc->coarse_phi_D) {
343     PetscInt on_D,on_primal;
344     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
345     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
346       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
347       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
348     }
349   }
350   if (!pcbddc->coarse_phi_B) {
351     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
352     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
353     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
354     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
355   }
356   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
357     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
358     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
359     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
360     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
361   }
362 
363   if (pcbddc->dbg_flag) {
364     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
365     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
366     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
367   }
368   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
369   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
370 
371   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
372 
373   /* vertices */
374   for (i=0;i<n_vertices;i++) {
375     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
376     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
377     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
378     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
379     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
380     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
381     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
382     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
384     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
385     if (n_constraints) {
386       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
387       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
388       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     }
390     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
392 
393     /* Set values in coarse basis function and subdomain part of coarse_mat */
394     /* coarse basis functions */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
402     if (pcbddc->switch_static || pcbddc->dbg_flag) {
403       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
405       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
407       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408     }
409     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
410     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     if (n_constraints) {
414       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417     }
418 
419     /* check */
420     if (pcbddc->dbg_flag) {
421       /* assemble subdomain vector on local nodes */
422       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
423       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
428       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
429       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
430       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
431       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
433       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
434       if (n_constraints) {
435         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
436         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
437         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       }
439       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
442       /* check saddle point solution */
443       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
446       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
447       /* shift by the identity matrix */
448       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
452     }
453   }
454 
455   /* constraints */
456   for (i=0;i<n_constraints;i++) {
457     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
458     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
459     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
460     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
461     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
462     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
463     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
464     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
465     if (n_vertices) {
466       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
467     }
468     /* Set values in coarse basis function and subdomain part of coarse_mat */
469     /* coarse basis functions */
470     j = i+n_vertices; /* don't touch this! */
471     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
472     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
476     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
477     if (pcbddc->switch_static || pcbddc->dbg_flag) {
478       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
482       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
483     }
484     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
485     if (n_vertices) {
486       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
487       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
488       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
489     }
490     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
491     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
492     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
493 
494     if (pcbddc->dbg_flag) {
495       /* assemble subdomain vector on nodes */
496       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
500       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
501       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
502       /* assemble subdomain vector of lagrange multipliers */
503       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
504       if (n_vertices) {
505         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
506         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
507         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
508       }
509       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
511       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
512       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
514       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
515       /* check saddle point solution */
516       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
518       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
519       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
520       /* shift by the identity matrix */
521       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
522       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
524       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
525     }
526   }
527   /* call assembling routines for local coarse basis */
528   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   if (pcbddc->switch_static || pcbddc->dbg_flag) {
531     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533   }
534 
535   /* compute other basis functions for non-symmetric problems */
536   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
537   if (!setsym || (setsym && !issym)) {
538     if (!pcbddc->coarse_psi_B) {
539       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
540       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
541       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
542       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
543     }
544     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
545       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
546       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
547       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
548       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
549     }
550     for (i=0;i<pcbddc->local_primal_size;i++) {
551       if (n_constraints) {
552         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
553         for (j=0;j<n_constraints;j++) {
554           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
555         }
556         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
557         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
558       }
559       if (i<n_vertices) {
560         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
561         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
562         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
563         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
564         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
565         if (n_constraints) {
566           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
567         }
568       } else {
569         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
570       }
571       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
572       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
573       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
575       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
577       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
578       if (i<n_vertices) {
579         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
580       }
581       if (pcbddc->switch_static || pcbddc->dbg_flag) {
582         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
586         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
587       }
588 
589       if (pcbddc->dbg_flag) {
590         /* assemble subdomain vector on nodes */
591         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
592         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
594         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
595         if (i<n_vertices) {
596           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
597         }
598         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
599         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
600         /* assemble subdomain vector of lagrange multipliers */
601         for (j=0;j<pcbddc->local_primal_size;j++) {
602           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
603         }
604         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
605         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
606         /* check saddle point solution */
607         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
609         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
610         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
611         /* shift by the identity matrix */
612         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
613         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
615         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
616       }
617     }
618     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     if (pcbddc->switch_static || pcbddc->dbg_flag) {
621       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
623     }
624   }
625   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
626   /* Checking coarse_sub_mat and coarse basis functios */
627   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
628   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
629   if (pcbddc->dbg_flag) {
630     Mat         coarse_sub_mat;
631     Mat         AUXMAT,TM1,TM2,TM3,TM4;
632     Mat         coarse_phi_D,coarse_phi_B;
633     Mat         coarse_psi_D,coarse_psi_B;
634     Mat         A_II,A_BB,A_IB,A_BI;
635     MatType     checkmattype=MATSEQAIJ;
636     PetscReal   real_value;
637 
638     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
639     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
640     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
641     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
642     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
643     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
644     if (pcbddc->coarse_psi_B) {
645       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
646       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
647     }
648     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
649 
650     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
651     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
652     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
653     if (pcbddc->coarse_psi_B) {
654       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
655       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
656       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
657       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
658       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
659       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
660       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
664       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
665       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
666     } else {
667       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
668       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
669       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
673       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
674       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
675     }
676     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
680     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
681     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
685     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
686     for (i=0;i<pcbddc->local_primal_size;i++) {
687       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
688     }
689     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
690     for (i=0;i<pcbddc->local_primal_size;i++) {
691       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
692     }
693     if (pcbddc->coarse_psi_B) {
694       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
695       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
696         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
697       }
698       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
699       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
700         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
701       }
702     }
703     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
707     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
711     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
712     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
713     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
714     if (pcbddc->coarse_psi_B) {
715       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
716       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
717     }
718     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
719     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
720     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
721     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
722   }
723   /* free memory */
724   if (n_vertices) {
725     ierr = PetscFree(vertices);CHKERRQ(ierr);
726     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
727     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
729     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
730     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
731   }
732   if (n_constraints) {
733     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
735     ierr = MatDestroy(&M1);CHKERRQ(ierr);
736     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
737   }
738   ierr = PetscFree(auxindices);CHKERRQ(ierr);
739   /* get back data */
740   *coarse_submat_vals_n = coarse_submat_vals;
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
747 {
748   PC_IS*            pcis = (PC_IS*)(pc->data);
749   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
750   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
751   PetscBool         issbaij,isbaij;
752   /* manage repeated solves */
753   MatReuse          reuse;
754   MatStructure      matstruct;
755   PetscErrorCode    ierr;
756 
757   PetscFunctionBegin;
758   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
759     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
760   }
761   /* get mat flags */
762   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
763   reuse = MAT_INITIAL_MATRIX;
764   if (pc->setupcalled) {
765     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
766     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
767     if (matstruct == SAME_NONZERO_PATTERN) {
768       reuse = MAT_REUSE_MATRIX;
769     } else {
770       reuse = MAT_INITIAL_MATRIX;
771     }
772   }
773   if (reuse == MAT_INITIAL_MATRIX) {
774     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
776     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
777     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
778     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
779   }
780 
781   /* transform local matrices if needed */
782   if (pcbddc->use_change_of_basis) {
783     Mat         change_mat_all;
784     PetscScalar *row_cmat_values;
785     PetscInt    *row_cmat_indices;
786     PetscInt    *nnz,*is_indices,*temp_indices;
787     PetscInt    i,j,k,n_D,n_B;
788 
789     /* Get Non-overlapping dimensions */
790     n_B = pcis->n_B;
791     n_D = pcis->n-n_B;
792 
793     /* compute nonzero structure of change of basis on all local nodes */
794     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
797     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
798     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
799     k=1;
800     for (i=0;i<n_B;i++) {
801       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802       nnz[is_indices[i]]=j;
803       if (k < j) k = j;
804       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
805     }
806     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
807     /* assemble change of basis matrix on the whole set of local dofs */
808     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
809     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
810     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
811     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
812     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
813     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     for (i=0;i<n_D;i++) {
815       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
819     for (i=0;i<n_B;i++) {
820       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
822       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
823       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
824     }
825     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
826     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
828     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
829     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
830     if (!issbaij && !isbaij) {
831       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
832     } else {
833       Mat work_mat;
834       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
835       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
836       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
837     }
838     /*
839     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
840     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
841     */
842     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
843     ierr = PetscFree(nnz);CHKERRQ(ierr);
844     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
845   } else {
846     /* without change of basis, the local matrix is unchanged */
847     if (!pcbddc->local_mat) {
848       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
849       pcbddc->local_mat = matis->A;
850     }
851   }
852 
853   /* get submatrices */
854   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
855   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
856   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
857   if (!issbaij) {
858     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
859     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
860   } else {
861     Mat newmat;
862     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
863     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
864     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
865     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
873 {
874   PC_IS*         pcis = (PC_IS*)(pc->data);
875   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
876   IS             is_aux1,is_aux2;
877   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
878   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
879   PetscInt       vbs,bs;
880   PetscBT        bitmask;
881   PetscErrorCode ierr;
882 
883   PetscFunctionBegin;
884   /* No need to setup local scatters if primal space is unchanged */
885   if (!pcbddc->new_primal_space) {
886     PetscFunctionReturn(0);
887   }
888   /* destroy old objects */
889   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
890   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
891   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
892   /* Set Non-overlapping dimensions */
893   n_B = pcis->n_B; n_D = pcis->n - n_B;
894   /* get vertex indices from constraint matrix */
895   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
896 
897   /* create auxiliary bitmask */
898   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
899   for (i=0;i<n_vertices;i++) {
900     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
901   }
902 
903   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
904   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
905   for (i=0, n_R=0; i<pcis->n; i++) {
906     if (!PetscBTLookup(bitmask,i)) {
907       idx_R_local[n_R] = i;
908       n_R++;
909     }
910   }
911 
912   /* Block code */
913   vbs = 1;
914   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
915   if (bs>1 && !(n_vertices%bs)) {
916     PetscBool is_blocked = PETSC_TRUE;
917     PetscInt  *vary;
918     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
919     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
920     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
921     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
922     for (i=0; i<n_vertices; i++) {
923       if (vary[i]!=0 && vary[i]!=bs) {
924         is_blocked = PETSC_FALSE;
925         break;
926       }
927     }
928     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
929       vbs = bs;
930       for (i=0;i<n_R/vbs;i++) {
931         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
932       }
933     }
934     ierr = PetscFree(vary);CHKERRQ(ierr);
935   }
936   ierr = PetscFree(vertices);CHKERRQ(ierr);
937   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
938   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
939 
940   /* print some info if requested */
941   if (pcbddc->dbg_flag) {
942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
948     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
949   }
950 
951   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
952   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
953   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
954   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
955   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
956   for (i=0; i<n_D; i++) {
957     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
958   }
959   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
960   for (i=0, j=0; i<n_R; i++) {
961     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
962       aux_array1[j++] = i;
963     }
964   }
965   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
966   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   for (i=0, j=0; i<n_B; i++) {
968     if (!PetscBTLookup(bitmask,is_indices[i])) {
969       aux_array2[j++] = i;
970     }
971   }
972   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
973   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
974   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
975   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
976   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
977 
978   if (pcbddc->switch_static || pcbddc->dbg_flag) {
979     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
980     for (i=0, j=0; i<n_R; i++) {
981       if (PetscBTLookup(bitmask,idx_R_local[i])) {
982         aux_array1[j++] = i;
983       }
984     }
985     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
986     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
987     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
988   }
989   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
990   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
997 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
998 {
999   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1000   PC_IS          *pcis = (PC_IS*)pc->data;
1001   PC             pc_temp;
1002   Mat            A_RR;
1003   Vec            vec1,vec2,vec3;
1004   MatStructure   matstruct;
1005   MatReuse       reuse;
1006   PetscScalar    m_one = -1.0;
1007   PetscReal      value;
1008   PetscInt       n_D,n_R,ibs,mbs;
1009   PetscBool      use_exact,use_exact_reduced,issbaij;
1010   PetscErrorCode ierr;
1011   /* prefixes stuff */
1012   char           dir_prefix[256],neu_prefix[256],str_level[3];
1013   size_t         len;
1014 
1015   PetscFunctionBegin;
1016   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1017 
1018   /* compute prefixes */
1019   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1020   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1021   if (!pcbddc->current_level) {
1022     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1023     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1025     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1026   } else {
1027     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1028     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1029     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1030     len -= 15; /* remove "pc_bddc_coarse_" */
1031     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1032     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1033     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     *(dir_prefix+len)='\0';
1035     *(neu_prefix+len)='\0';
1036     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1037     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1039     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1040   }
1041 
1042   /* DIRICHLET PROBLEM */
1043   /* Matrix for Dirichlet problem is pcis->A_II */
1044   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1045   if (!pcbddc->ksp_D) { /* create object if not yet build */
1046     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1047     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1048     /* default */
1049     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1050     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1051     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1052     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1053     if (issbaij) {
1054       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1055     } else {
1056       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1057     }
1058     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1059   }
1060   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1061   /* Allow user's customization */
1062   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1063   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1064   if (!n_D) {
1065     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1066     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1067   }
1068   /* Set Up KSP for Dirichlet problem of BDDC */
1069   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1070   /* set ksp_D into pcis data */
1071   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1072   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1073   pcis->ksp_D = pcbddc->ksp_D;
1074 
1075   /* NEUMANN PROBLEM */
1076   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1077   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1078   if (pcbddc->ksp_R) { /* already created ksp */
1079     PetscInt nn_R;
1080     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1081     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1082     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1083     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1084       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1085       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1086       reuse = MAT_INITIAL_MATRIX;
1087     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1088       if (pcbddc->new_primal_space) { /* we are not sure the matrix will have the same nonzero pattern */
1089         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090         reuse = MAT_INITIAL_MATRIX;
1091       } else { /* safe to reuse the matrix */
1092         reuse = MAT_REUSE_MATRIX;
1093       }
1094     }
1095     /* last check */
1096     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1097       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1098       reuse = MAT_INITIAL_MATRIX;
1099     }
1100   } else { /* first time, so we need to create the matrix */
1101     reuse = MAT_INITIAL_MATRIX;
1102   }
1103   /* extract A_RR */
1104   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1105   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1106   if (ibs != mbs) {
1107     Mat newmat;
1108     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1109     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1110     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1113   }
1114   if (!pcbddc->ksp_R) { /* create object if not present */
1115     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1116     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1117     /* default */
1118     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1119     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1120     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1121     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1122     if (issbaij) {
1123       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1124     } else {
1125       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1126     }
1127     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1128   }
1129   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1130   /* Allow user's customization */
1131   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1132   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1133   if (!n_R) {
1134     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1135     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1136   }
1137   /* Set Up KSP for Neumann problem of BDDC */
1138   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1139 
1140   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1141 
1142   /* Dirichlet */
1143   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1144   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1145   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1146   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1147   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1148   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1149   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1150   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1151   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1152   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1153   /* need to be adapted? */
1154   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1155   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1156   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1157   /* print info */
1158   if (pcbddc->dbg_flag) {
1159     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1160     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1161     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1162     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1163     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1164   }
1165   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1167   }
1168 
1169   /* Neumann */
1170   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1171   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1172   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1173   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1174   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1175   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1176   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1177   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1178   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1179   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1180   /* need to be adapted? */
1181   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1182   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1183   /* print info */
1184   if (pcbddc->dbg_flag) {
1185     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1186     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1187   }
1188   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1189     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1190   }
1191   /* free Neumann problem's matrix */
1192   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1198 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1199 {
1200   PetscErrorCode ierr;
1201   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1202 
1203   PetscFunctionBegin;
1204   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1205   if (pcbddc->local_auxmat1) {
1206     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1207     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1214 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1215 {
1216   PetscErrorCode ierr;
1217   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1218   PC_IS*            pcis = (PC_IS*)  (pc->data);
1219   const PetscScalar zero = 0.0;
1220 
1221   PetscFunctionBegin;
1222   /* Application of PHI^T (or PSI^T)  */
1223   if (pcbddc->coarse_psi_B) {
1224     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1225     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1226   } else {
1227     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1228     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1229   }
1230   /* Scatter data of coarse_rhs */
1231   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1232   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234   /* Local solution on R nodes */
1235   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   if (pcbddc->switch_static) {
1239     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1240     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1241   }
1242   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1243   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1244   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   if (pcbddc->switch_static) {
1247     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249   }
1250 
1251   /* Coarse solution */
1252   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1254     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1255   }
1256   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1257   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258 
1259   /* Sum contributions from two levels */
1260   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1261   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1267 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1268 {
1269   PetscErrorCode ierr;
1270   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1271 
1272   PetscFunctionBegin;
1273   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1279 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1280 {
1281   PetscErrorCode ierr;
1282   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1283 
1284   PetscFunctionBegin;
1285   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /* uncomment for testing purposes */
1290 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1291 #undef __FUNCT__
1292 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1293 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1294 {
1295   PetscErrorCode    ierr;
1296   PC_IS*            pcis = (PC_IS*)(pc->data);
1297   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1298   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1299   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1300   MatType           impMatType=MATSEQAIJ;
1301   /* one and zero */
1302   PetscScalar       one=1.0,zero=0.0;
1303   /* space to store constraints and their local indices */
1304   PetscScalar       *temp_quadrature_constraint;
1305   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1306   /* iterators */
1307   PetscInt          i,j,k,total_counts,temp_start_ptr;
1308   /* stuff to store connected components stored in pcbddc->mat_graph */
1309   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1310   PetscInt          n_ISForFaces,n_ISForEdges;
1311   /* near null space stuff */
1312   MatNullSpace      nearnullsp;
1313   const Vec         *nearnullvecs;
1314   Vec               *localnearnullsp;
1315   PetscBool         nnsp_has_cnst;
1316   PetscInt          nnsp_size;
1317   PetscScalar       *array;
1318   /* BLAS integers */
1319   PetscBLASInt      lwork,lierr;
1320   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1321   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1322   /* LAPACK working arrays for SVD or POD */
1323   PetscBool         skip_lapack;
1324   PetscScalar       *work;
1325   PetscReal         *singular_vals;
1326 #if defined(PETSC_USE_COMPLEX)
1327   PetscReal         *rwork;
1328 #endif
1329 #if defined(PETSC_MISSING_LAPACK_GESVD)
1330   PetscBLASInt      Blas_one_2=1;
1331   PetscScalar       *temp_basis,*correlation_mat;
1332 #else
1333   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1334   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1335 #endif
1336   /* change of basis */
1337   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1338   PetscBool         boolforchange,qr_needed;
1339   PetscBT           touched,change_basis,qr_needed_idx;
1340   /* auxiliary stuff */
1341   PetscInt          *nnz,*is_indices,*local_to_B;
1342   /* some quantities */
1343   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1344   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1345 
1346 
1347   PetscFunctionBegin;
1348   /* Destroy Mat objects computed previously */
1349   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1350   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1351   /* Get index sets for faces, edges and vertices from graph */
1352   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1353     pcbddc->use_vertices = PETSC_TRUE;
1354   }
1355   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1356   /* HACK: provide functions to set change of basis */
1357   if (!ISForVertices && pcbddc->NullSpace) {
1358     pcbddc->use_change_of_basis = PETSC_TRUE;
1359     pcbddc->use_change_on_faces = PETSC_FALSE;
1360   }
1361   /* print some info */
1362   if (pcbddc->dbg_flag) {
1363     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1364     i = 0;
1365     if (ISForVertices) {
1366       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1367     }
1368     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1369     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1370     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1371     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1372   }
1373   /* check if near null space is attached to global mat */
1374   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1375   if (nearnullsp) {
1376     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1377     /* remove any stored info */
1378     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1379     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1380     /* store information for BDDC solver reuse */
1381     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1382     pcbddc->onearnullspace = nearnullsp;
1383     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1384     for (i=0;i<nnsp_size;i++) {
1385       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1386     }
1387   } else { /* if near null space is not provided BDDC uses constants by default */
1388     nnsp_size = 0;
1389     nnsp_has_cnst = PETSC_TRUE;
1390   }
1391   /* get max number of constraints on a single cc */
1392   max_constraints = nnsp_size;
1393   if (nnsp_has_cnst) max_constraints++;
1394 
1395   /*
1396        Evaluate maximum storage size needed by the procedure
1397        - temp_indices will contain start index of each constraint stored as follows
1398        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1399        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1400        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1401                                                                                                                                                          */
1402   total_counts = n_ISForFaces+n_ISForEdges;
1403   total_counts *= max_constraints;
1404   n_vertices = 0;
1405   if (ISForVertices) {
1406     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1407   }
1408   total_counts += n_vertices;
1409   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1410   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1411   total_counts = 0;
1412   max_size_of_constraint = 0;
1413   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1414     if (i<n_ISForEdges) {
1415       used_IS = &ISForEdges[i];
1416     } else {
1417       used_IS = &ISForFaces[i-n_ISForEdges];
1418     }
1419     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1420     total_counts += j;
1421     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1422   }
1423   total_counts *= max_constraints;
1424   total_counts += n_vertices;
1425   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1426   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1427   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1428   /* local to boundary numbering */
1429   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1430   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1431   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1432   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1433   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1434   /* get local part of global near null space vectors */
1435   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1436   for (k=0;k<nnsp_size;k++) {
1437     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1438     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440   }
1441 
1442   /* whether or not to skip lapack calls */
1443   skip_lapack = PETSC_TRUE;
1444   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1445 
1446   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1447   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1448     PetscScalar temp_work;
1449 #if defined(PETSC_MISSING_LAPACK_GESVD)
1450     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1451     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1452     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1453     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1454 #if defined(PETSC_USE_COMPLEX)
1455     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1456 #endif
1457     /* now we evaluate the optimal workspace using query with lwork=-1 */
1458     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1459     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1460     lwork = -1;
1461     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1462 #if !defined(PETSC_USE_COMPLEX)
1463     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1464 #else
1465     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1466 #endif
1467     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1468     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1469 #else /* on missing GESVD */
1470     /* SVD */
1471     PetscInt max_n,min_n;
1472     max_n = max_size_of_constraint;
1473     min_n = max_constraints;
1474     if (max_size_of_constraint < max_constraints) {
1475       min_n = max_size_of_constraint;
1476       max_n = max_constraints;
1477     }
1478     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1479 #if defined(PETSC_USE_COMPLEX)
1480     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1481 #endif
1482     /* now we evaluate the optimal workspace using query with lwork=-1 */
1483     lwork = -1;
1484     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1485     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1486     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1487     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1488 #if !defined(PETSC_USE_COMPLEX)
1489     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1490 #else
1491     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1492 #endif
1493     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1494     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1495 #endif /* on missing GESVD */
1496     /* Allocate optimal workspace */
1497     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1498     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1499   }
1500   /* Now we can loop on constraining sets */
1501   total_counts = 0;
1502   temp_indices[0] = 0;
1503   /* vertices */
1504   if (ISForVertices) {
1505     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1506     if (nnsp_has_cnst) { /* consider all vertices */
1507       for (i=0;i<n_vertices;i++) {
1508         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1509         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1510         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1511         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1512         total_counts++;
1513       }
1514     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1515       PetscBool used_vertex;
1516       for (i=0;i<n_vertices;i++) {
1517         used_vertex = PETSC_FALSE;
1518         k = 0;
1519         while (!used_vertex && k<nnsp_size) {
1520           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1521           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1522             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1523             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1524             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1525             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1526             total_counts++;
1527             used_vertex = PETSC_TRUE;
1528           }
1529           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1530           k++;
1531         }
1532       }
1533     }
1534     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1535     n_vertices = total_counts;
1536   }
1537 
1538   /* edges and faces */
1539   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1540     if (i<n_ISForEdges) {
1541       used_IS = &ISForEdges[i];
1542       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1543     } else {
1544       used_IS = &ISForFaces[i-n_ISForEdges];
1545       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1546     }
1547     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1548     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1549     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1550     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1551     /* change of basis should not be performed on local periodic nodes */
1552     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1553     if (nnsp_has_cnst) {
1554       PetscScalar quad_value;
1555       temp_constraints++;
1556       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1557       for (j=0;j<size_of_constraint;j++) {
1558         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1559         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1560         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1561       }
1562       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1563       total_counts++;
1564     }
1565     for (k=0;k<nnsp_size;k++) {
1566       PetscReal real_value;
1567       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1568       for (j=0;j<size_of_constraint;j++) {
1569         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1570         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1571         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1572       }
1573       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1574       /* check if array is null on the connected component */
1575       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1576       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1577       if (real_value > 0.0) { /* keep indices and values */
1578         temp_constraints++;
1579         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1580         total_counts++;
1581       }
1582     }
1583     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1584     valid_constraints = temp_constraints;
1585     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1586     if (!pcbddc->use_nnsp_true && temp_constraints) {
1587       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1588 
1589 #if defined(PETSC_MISSING_LAPACK_GESVD)
1590       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1591          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1592          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1593             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1594             from that computed using LAPACKgesvd
1595          -> This is due to a different computation of eigenvectors in LAPACKheev
1596          -> The quality of the POD-computed basis will be the same */
1597       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1598       /* Store upper triangular part of correlation matrix */
1599       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1600       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1601       for (j=0;j<temp_constraints;j++) {
1602         for (k=0;k<j+1;k++) {
1603           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1604         }
1605       }
1606       /* compute eigenvalues and eigenvectors of correlation matrix */
1607       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1608       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1609 #if !defined(PETSC_USE_COMPLEX)
1610       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1611 #else
1612       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1613 #endif
1614       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1615       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1616       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1617       j=0;
1618       while (j < temp_constraints && singular_vals[j] < tol) j++;
1619       total_counts=total_counts-j;
1620       valid_constraints = temp_constraints-j;
1621       /* scale and copy POD basis into used quadrature memory */
1622       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1623       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1624       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1625       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1626       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1628       if (j<temp_constraints) {
1629         PetscInt ii;
1630         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1631         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1632         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1633         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1634         for (k=0;k<temp_constraints-j;k++) {
1635           for (ii=0;ii<size_of_constraint;ii++) {
1636             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1637           }
1638         }
1639       }
1640 #else  /* on missing GESVD */
1641       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1642       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1643       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1644       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1645 #if !defined(PETSC_USE_COMPLEX)
1646       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1647 #else
1648       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1649 #endif
1650       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1651       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1652       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1653       k = temp_constraints;
1654       if (k > size_of_constraint) k = size_of_constraint;
1655       j = 0;
1656       while (j < k && singular_vals[k-j-1] < tol) j++;
1657       total_counts = total_counts-temp_constraints+k-j;
1658       valid_constraints = k-j;
1659 #endif /* on missing GESVD */
1660     }
1661     /* setting change_of_basis flag is safe now */
1662     if (boolforchange) {
1663       for (j=0;j<valid_constraints;j++) {
1664         PetscBTSet(change_basis,total_counts-j-1);
1665       }
1666     }
1667   }
1668   /* free index sets of faces, edges and vertices */
1669   for (i=0;i<n_ISForFaces;i++) {
1670     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1671   }
1672   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1673   for (i=0;i<n_ISForEdges;i++) {
1674     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1675   }
1676   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1677   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1678 
1679   /* free workspace */
1680   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1681     ierr = PetscFree(work);CHKERRQ(ierr);
1682 #if defined(PETSC_USE_COMPLEX)
1683     ierr = PetscFree(rwork);CHKERRQ(ierr);
1684 #endif
1685     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1686 #if defined(PETSC_MISSING_LAPACK_GESVD)
1687     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1688     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1689 #endif
1690   }
1691   for (k=0;k<nnsp_size;k++) {
1692     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1693   }
1694   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1695 
1696   /* set quantities in pcbddc data structure */
1697   /* n_vertices defines the number of subdomain corners in the primal space */
1698   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1699   pcbddc->local_primal_size = total_counts;
1700   pcbddc->n_vertices = n_vertices;
1701   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1702 
1703   /* Create constraint matrix */
1704   /* The constraint matrix is used to compute the l2g map of primal dofs */
1705   /* so we need to set it up properly either with or without change of basis */
1706   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1707   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1708   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1709   /* array to compute a local numbering of constraints : vertices first then constraints */
1710   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1711   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1712   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1713   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1714   /* auxiliary stuff for basis change */
1715   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1716   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1717 
1718   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1719   total_primal_vertices=0;
1720   for (i=0;i<pcbddc->local_primal_size;i++) {
1721     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1722     if (size_of_constraint == 1) {
1723       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1724       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1725       aux_primal_minloc[total_primal_vertices]=0;
1726       total_primal_vertices++;
1727     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1728       PetscInt min_loc,min_index;
1729       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1730       /* find first untouched local node */
1731       k = 0;
1732       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1733       min_index = global_indices[k];
1734       min_loc = k;
1735       /* search the minimum among global nodes already untouched on the cc */
1736       for (k=1;k<size_of_constraint;k++) {
1737         /* there can be more than one constraint on a single connected component */
1738         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1739           min_index = global_indices[k];
1740           min_loc = k;
1741         }
1742       }
1743       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1744       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1745       aux_primal_minloc[total_primal_vertices]=min_loc;
1746       total_primal_vertices++;
1747     }
1748   }
1749   /* determine if a QR strategy is needed for change of basis */
1750   qr_needed = PETSC_FALSE;
1751   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1752   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1753     if (PetscBTLookup(change_basis,i)) {
1754       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1755       j = 0;
1756       for (k=0;k<size_of_constraint;k++) {
1757         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1758           j++;
1759         }
1760       }
1761       /* found more than one primal dof on the cc */
1762       if (j > 1) {
1763         PetscBTSet(qr_needed_idx,i);
1764         qr_needed = PETSC_TRUE;
1765       }
1766     }
1767   }
1768   /* free workspace */
1769   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1770 
1771   /* permute indices in order to have a sorted set of vertices */
1772   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1773 
1774   /* nonzero structure of constraint matrix */
1775   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1776   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1777   j=total_primal_vertices;
1778   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1779     if (!PetscBTLookup(change_basis,i)) {
1780       nnz[j]=temp_indices[i+1]-temp_indices[i];
1781       j++;
1782     }
1783   }
1784   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1785   ierr = PetscFree(nnz);CHKERRQ(ierr);
1786   /* set values in constraint matrix */
1787   for (i=0;i<total_primal_vertices;i++) {
1788     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1789   }
1790   total_counts = total_primal_vertices;
1791   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1792     if (!PetscBTLookup(change_basis,i)) {
1793       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1794       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1795       total_counts++;
1796     }
1797   }
1798   /* assembling */
1799   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1800   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1801   /*
1802   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1803   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1804   */
1805   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1806   if (pcbddc->use_change_of_basis) {
1807     /* dual and primal dofs on a single cc */
1808     PetscInt     dual_dofs,primal_dofs;
1809     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1810     PetscInt     primal_counter;
1811     /* working stuff for GEQRF */
1812     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1813     PetscBLASInt lqr_work;
1814     /* working stuff for UNGQR */
1815     PetscScalar  *gqr_work,lgqr_work_t;
1816     PetscBLASInt lgqr_work;
1817     /* working stuff for TRTRS */
1818     PetscScalar  *trs_rhs;
1819     PetscBLASInt Blas_NRHS;
1820     /* pointers for values insertion into change of basis matrix */
1821     PetscInt     *start_rows,*start_cols;
1822     PetscScalar  *start_vals;
1823     /* working stuff for values insertion */
1824     PetscBT      is_primal;
1825 
1826     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1827     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1828     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1829     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1830     /* work arrays */
1831     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1832     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1833     /* nonzeros per row */
1834     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1835       if (PetscBTLookup(change_basis,i)) {
1836         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1837         if (PetscBTLookup(qr_needed_idx,i)) {
1838           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1839         } else {
1840           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1841           /* get local primal index on the cc */
1842           j = 0;
1843           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1844           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1845         }
1846       }
1847     }
1848     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1849     ierr = PetscFree(nnz);CHKERRQ(ierr);
1850     /* Set initial identity in the matrix */
1851     for (i=0;i<pcis->n_B;i++) {
1852       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1853     }
1854 
1855     if (pcbddc->dbg_flag) {
1856       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1857       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1858     }
1859 
1860 
1861     /* Now we loop on the constraints which need a change of basis */
1862     /*
1863        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1864        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1865 
1866        Basic blocks of change of basis matrix T computed
1867 
1868           - Using the following block transformation if there is only a primal dof on the cc
1869             (in the example, primal dof is the last one of the edge in LOCAL ordering
1870              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1871             | 1        0   ...        0     1 |
1872             | 0        1   ...        0     1 |
1873             |              ...                |
1874             | 0        ...            1     1 |
1875             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1876 
1877           - via QR decomposition of constraints otherwise
1878     */
1879     if (qr_needed) {
1880       /* space to store Q */
1881       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1882       /* first we issue queries for optimal work */
1883       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1884       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1885       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1886       lqr_work = -1;
1887       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1888       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1889       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1890       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1891       lgqr_work = -1;
1892       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1893       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1894       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1895       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1896       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1897       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1898       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1899       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1900       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1901       /* array to store scaling factors for reflectors */
1902       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1903       /* array to store rhs and solution of triangular solver */
1904       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1905       /* allocating workspace for check */
1906       if (pcbddc->dbg_flag) {
1907         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1908       }
1909     }
1910     /* array to store whether a node is primal or not */
1911     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1912     for (i=0;i<total_primal_vertices;i++) {
1913       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1914     }
1915 
1916     /* loop on constraints and see whether or not they need a change of basis and compute it */
1917     /* -> using implicit ordering contained in temp_indices data */
1918     total_counts = pcbddc->n_vertices;
1919     primal_counter = total_counts;
1920     while (total_counts<pcbddc->local_primal_size) {
1921       primal_dofs = 1;
1922       if (PetscBTLookup(change_basis,total_counts)) {
1923         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1924         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1925           primal_dofs++;
1926         }
1927         /* get constraint info */
1928         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1929         dual_dofs = size_of_constraint-primal_dofs;
1930 
1931         if (pcbddc->dbg_flag) {
1932           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1933         }
1934 
1935         if (primal_dofs > 1) { /* QR */
1936 
1937           /* copy quadrature constraints for change of basis check */
1938           if (pcbddc->dbg_flag) {
1939             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1940           }
1941           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1942           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1943 
1944           /* compute QR decomposition of constraints */
1945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1946           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1947           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1948           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1949           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1950           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1951           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1952 
1953           /* explictly compute R^-T */
1954           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1955           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1956           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1957           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1958           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1959           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1960           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1961           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1962           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1963           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1964 
1965           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1966           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1967           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1968           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1969           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1970           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1971           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1972           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1973           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1974 
1975           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1976              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1977              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1978           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1979           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1980           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1981           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1982           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1983           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1984           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1985           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1986           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1987           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1988 
1989           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1990           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1991           /* insert cols for primal dofs */
1992           for (j=0;j<primal_dofs;j++) {
1993             start_vals = &qr_basis[j*size_of_constraint];
1994             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1995             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1996           }
1997           /* insert cols for dual dofs */
1998           for (j=0,k=0;j<dual_dofs;k++) {
1999             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2000               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2001               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2002               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2003               j++;
2004             }
2005           }
2006 
2007           /* check change of basis */
2008           if (pcbddc->dbg_flag) {
2009             PetscInt   ii,jj;
2010             PetscBool valid_qr=PETSC_TRUE;
2011             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2012             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2013             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2014             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2015             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2016             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2017             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2018             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2019             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2020             for (jj=0;jj<size_of_constraint;jj++) {
2021               for (ii=0;ii<primal_dofs;ii++) {
2022                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2023                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2024               }
2025             }
2026             if (!valid_qr) {
2027               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2028               for (jj=0;jj<size_of_constraint;jj++) {
2029                 for (ii=0;ii<primal_dofs;ii++) {
2030                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2031                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2032                   }
2033                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2034                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2035                   }
2036                 }
2037               }
2038             } else {
2039               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2040             }
2041           }
2042         } else { /* simple transformation block */
2043           PetscInt row,col;
2044           PetscScalar val;
2045           for (j=0;j<size_of_constraint;j++) {
2046             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2047             if (!PetscBTLookup(is_primal,row)) {
2048               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2049               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2050               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2051             } else {
2052               for (k=0;k<size_of_constraint;k++) {
2053                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2054                 if (row != col) {
2055                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2056                 } else {
2057                   val = 1.0;
2058                 }
2059                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2060               }
2061             }
2062           }
2063         }
2064         /* increment primal counter */
2065         primal_counter += primal_dofs;
2066       } else {
2067         if (pcbddc->dbg_flag) {
2068           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2069         }
2070       }
2071       /* increment constraint counter total_counts */
2072       total_counts += primal_dofs;
2073     }
2074 
2075     /* free workspace */
2076     if (qr_needed) {
2077       if (pcbddc->dbg_flag) {
2078         ierr = PetscFree(work);CHKERRQ(ierr);
2079       }
2080       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2081       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2082       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2083       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2084       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2085     }
2086     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2087     /* assembling */
2088     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090     /*
2091     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2092     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2093     */
2094   }
2095 
2096   /* flush dbg viewer */
2097   if (pcbddc->dbg_flag) {
2098     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2099   }
2100 
2101   /* free workspace */
2102   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2103   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2104   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2105   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2106   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2107   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2108   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2109   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2110   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
2111   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2117 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2118 {
2119   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2120   PC_IS       *pcis = (PC_IS*)pc->data;
2121   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2122   PetscInt    bs,ierr,i,vertex_size;
2123   PetscViewer viewer=pcbddc->dbg_viewer;
2124 
2125   PetscFunctionBegin;
2126   /* Reset previously computed graph */
2127   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2128   /* Init local Graph struct */
2129   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2130 
2131   /* Check validity of the csr graph passed in by the user */
2132   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2133     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2134   }
2135 
2136   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2137   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2138     Mat mat_adj;
2139     const PetscInt *xadj,*adjncy;
2140     PetscBool flg_row=PETSC_TRUE;
2141 
2142     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2143     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2144     if (!flg_row) {
2145       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2146     }
2147     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2148     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2149     if (!flg_row) {
2150       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2151     }
2152     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2153   }
2154 
2155   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2156   vertex_size = 1;
2157   if (!pcbddc->user_provided_isfordofs) {
2158     if (!pcbddc->n_ISForDofs) {
2159       IS *custom_ISForDofs;
2160 
2161       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2162       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2163       for (i=0;i<bs;i++) {
2164         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2165       }
2166       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2167       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2168       /* remove my references to IS objects */
2169       for (i=0;i<bs;i++) {
2170         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2171       }
2172       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2173     }
2174   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2175     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2176   }
2177 
2178   /* Setup of Graph */
2179   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2180 
2181   /* Graph's connected components analysis */
2182   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2183 
2184   /* print some info to stdout */
2185   if (pcbddc->dbg_flag) {
2186     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2187   }
2188 
2189   /* mark topography has done */
2190   pcbddc->recompute_topography = PETSC_FALSE;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2196 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2197 {
2198   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2199   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   n = 0;
2204   vertices = 0;
2205   if (pcbddc->ConstraintMatrix) {
2206     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2207     for (i=0;i<local_primal_size;i++) {
2208       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2209       if (size_of_constraint == 1) n++;
2210       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2211     }
2212     if (vertices_idx) {
2213       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2214       n = 0;
2215       for (i=0;i<local_primal_size;i++) {
2216         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2217         if (size_of_constraint == 1) {
2218           vertices[n++]=row_cmat_indices[0];
2219         }
2220         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2221       }
2222     }
2223   }
2224   *n_vertices = n;
2225   if (vertices_idx) *vertices_idx = vertices;
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2231 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2232 {
2233   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2234   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2235   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2236   PetscBT        touched;
2237   PetscErrorCode ierr;
2238 
2239     /* This function assumes that the number of local constraints per connected component
2240        is not greater than the number of nodes defined for the connected component
2241        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2242   PetscFunctionBegin;
2243   n = 0;
2244   constraints_index = 0;
2245   if (pcbddc->ConstraintMatrix) {
2246     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2247     max_size_of_constraint = 0;
2248     for (i=0;i<local_primal_size;i++) {
2249       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2250       if (size_of_constraint > 1) {
2251         n++;
2252       }
2253       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2254       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2255     }
2256     if (constraints_idx) {
2257       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2258       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2259       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2260       n = 0;
2261       for (i=0;i<local_primal_size;i++) {
2262         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2263         if (size_of_constraint > 1) {
2264           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2265           /* find first untouched local node */
2266           j = 0;
2267           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2268           min_index = row_cmat_global_indices[j];
2269           min_loc = j;
2270           /* search the minimum among nodes not yet touched on the connected component
2271              since there can be more than one constraint on a single cc */
2272           for (j=1;j<size_of_constraint;j++) {
2273             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2274               min_index = row_cmat_global_indices[j];
2275               min_loc = j;
2276             }
2277           }
2278           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2279           constraints_index[n++] = row_cmat_indices[min_loc];
2280         }
2281         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2282       }
2283       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2284       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2285     }
2286   }
2287   *n_constraints = n;
2288   if (constraints_idx) *constraints_idx = constraints_index;
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /* the next two functions has been adapted from pcis.c */
2293 #undef __FUNCT__
2294 #define __FUNCT__ "PCBDDCApplySchur"
2295 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2296 {
2297   PetscErrorCode ierr;
2298   PC_IS          *pcis = (PC_IS*)(pc->data);
2299 
2300   PetscFunctionBegin;
2301   if (!vec2_B) { vec2_B = v; }
2302   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2303   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2304   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2305   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2306   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2312 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2313 {
2314   PetscErrorCode ierr;
2315   PC_IS          *pcis = (PC_IS*)(pc->data);
2316 
2317   PetscFunctionBegin;
2318   if (!vec2_B) { vec2_B = v; }
2319   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2320   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2321   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2322   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2323   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "PCBDDCSubsetNumbering"
2329 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2330 {
2331   Vec            local_vec,global_vec;
2332   IS             seqis,paris;
2333   VecScatter     scatter_ctx;
2334   PetscScalar    *array;
2335   PetscInt       *temp_global_dofs;
2336   PetscScalar    globalsum;
2337   PetscInt       i,j,s;
2338   PetscInt       nlocals,first_index,old_index,max_local;
2339   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2340   PetscMPIInt    *dof_sizes,*dof_displs;
2341   PetscBool      first_found;
2342   PetscErrorCode ierr;
2343 
2344   PetscFunctionBegin;
2345   /* mpi buffers */
2346   MPI_Comm_size(comm,&size_prec_comm);
2347   MPI_Comm_rank(comm,&rank_prec_comm);
2348   j = ( !rank_prec_comm ? size_prec_comm : 0);
2349   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2350   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2351   /* get maximum size of subset */
2352   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2353   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2354   max_local = 0;
2355   if (n_local_dofs) {
2356     max_local = temp_global_dofs[0];
2357     for (i=1;i<n_local_dofs;i++) {
2358       if (max_local < temp_global_dofs[i] ) {
2359         max_local = temp_global_dofs[i];
2360       }
2361     }
2362   }
2363   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2364   max_global++;
2365   max_local = 0;
2366   if (n_local_dofs) {
2367     max_local = local_dofs[0];
2368     for (i=1;i<n_local_dofs;i++) {
2369       if (max_local < local_dofs[i] ) {
2370         max_local = local_dofs[i];
2371       }
2372     }
2373   }
2374   max_local++;
2375   /* allocate workspace */
2376   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2377   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2378   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2379   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2380   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2381   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2382   /* create scatter */
2383   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2384   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2385   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2386   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2387   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2388   /* init array */
2389   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2390   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2391   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2392   if (local_dofs_mult) {
2393     for (i=0;i<n_local_dofs;i++) {
2394       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2395     }
2396   } else {
2397     for (i=0;i<n_local_dofs;i++) {
2398       array[local_dofs[i]]=1.0;
2399     }
2400   }
2401   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2402   /* scatter into global vec and get total number of global dofs */
2403   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2404   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2405   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2406   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2407   /* Fill global_vec with cumulative function for global numbering */
2408   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2409   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2410   nlocals = 0;
2411   first_index = -1;
2412   first_found = PETSC_FALSE;
2413   for (i=0;i<s;i++) {
2414     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2415       first_found = PETSC_TRUE;
2416       first_index = i;
2417     }
2418     nlocals += (PetscInt)PetscRealPart(array[i]);
2419   }
2420   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2421   if (!rank_prec_comm) {
2422     dof_displs[0]=0;
2423     for (i=1;i<size_prec_comm;i++) {
2424       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2425     }
2426   }
2427   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2428   if (first_found) {
2429     array[first_index] += (PetscScalar)nlocals;
2430     old_index = first_index;
2431     for (i=first_index+1;i<s;i++) {
2432       if (PetscRealPart(array[i]) > 0.0) {
2433         array[i] += array[old_index];
2434         old_index = i;
2435       }
2436     }
2437   }
2438   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2439   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2440   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2441   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2442   /* get global ordering of local dofs */
2443   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2444   if (local_dofs_mult) {
2445     for (i=0;i<n_local_dofs;i++) {
2446       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2447     }
2448   } else {
2449     for (i=0;i<n_local_dofs;i++) {
2450       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2451     }
2452   }
2453   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2454   /* free workspace */
2455   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2456   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2457   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2458   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2459   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2460   /* return pointer to global ordering of local dofs */
2461   *global_numbering_subset = temp_global_dofs;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2467 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2468 {
2469   PetscInt       i,j;
2470   PetscScalar    *alphas;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   /* this implements stabilized Gram-Schmidt */
2475   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2476   for (i=0;i<n;i++) {
2477     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2478     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2479     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2480   }
2481   ierr = PetscFree(alphas);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatConvert_IS_AIJ"
2488 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2489 {
2490   Mat new_mat;
2491   Mat_IS *matis = (Mat_IS*)(mat->data);
2492   /* info on mat */
2493   PetscInt bs,rows,cols;
2494   PetscInt lrows,lcols;
2495   PetscInt local_rows,local_cols;
2496   PetscMPIInt nsubdomains;
2497   /* preallocation */
2498   Vec vec_dnz,vec_onz;
2499   PetscScalar *my_dnz,*my_onz,*array;
2500   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2501   PetscInt  index_row,index_col,owner;
2502   PetscInt  *local_indices,*global_indices;
2503   /* work */
2504   PetscInt i,j;
2505   PetscErrorCode ierr;
2506 
2507   PetscFunctionBegin;
2508   /* CHECKS */
2509   /* get info from mat */
2510   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2511   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2512   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2513   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2514 
2515   /* MAT_INITIAL_MATRIX */
2516   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2517   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2518   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2519   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2520   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2521   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2522 
2523   /*
2524     preallocation
2525   */
2526 
2527   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2528   /*
2529      Some vectors are needed to sum up properly on shared interface dofs.
2530      Note that preallocation is not exact, since it overestimates nonzeros
2531   */
2532 /*
2533   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2534   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2535   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2536   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2537 */
2538   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2539   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2540   /* All processes need to compute entire row ownership */
2541   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2542   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2543   for (i=0;i<nsubdomains;i++) {
2544     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2545       row_ownership[j]=i;
2546     }
2547   }
2548   /* map indices of local mat to global values */
2549   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2550   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2551   for (i=0;i<local_rows;i++) local_indices[i]=i;
2552   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2553   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2554 
2555   /*
2556      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2557      then, they will be summed up properly. This way, preallocation is always sufficient
2558   */
2559   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2560   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2561   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2562   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2563   for (i=0;i<local_rows;i++) {
2564     index_row = global_indices[i];
2565     for (j=i;j<local_rows;j++) {
2566       owner = row_ownership[index_row];
2567       index_col = global_indices[j];
2568       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2569         my_dnz[i] += 1.0;
2570       } else { /* offdiag block */
2571         my_onz[i] += 1.0;
2572       }
2573       /* same as before, interchanging rows and cols */
2574       if (i != j) {
2575         owner = row_ownership[index_col];
2576         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2577           my_dnz[j] += 1.0;
2578         } else {
2579           my_onz[j] += 1.0;
2580         }
2581       }
2582     }
2583   }
2584   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2585   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2586   if (local_rows) { /* multilevel guard */
2587     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2588     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2589   }
2590   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2591   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2592   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2593   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2594   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2595   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2596   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2597 
2598   /* set computed preallocation in dnz and onz */
2599   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2600   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2601   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2602   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2603   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2604   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2605   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2606   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2607 
2608   /* Resize preallocation if overestimated */
2609   for (i=0;i<lrows;i++) {
2610     dnz[i] = PetscMin(dnz[i],lcols);
2611     onz[i] = PetscMin(onz[i],cols-lcols);
2612   }
2613   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2614   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2615 
2616   /*
2617     Set values. Very Basic.
2618   */
2619   for (i=0;i<local_rows;i++) {
2620     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2621     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2622     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2623     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2624     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2625   }
2626   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2627   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2628   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629 
2630   /* get back mat */
2631   *M = new_mat;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatISSubassemble_Private"
2637 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2638 {
2639   Mat             subdomain_adj;
2640   IS              new_ranks,ranks_send_to;
2641   MatPartitioning partitioner;
2642   Mat_IS          *matis;
2643   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2644   PetscInt        prank;
2645   PetscMPIInt     size,rank,color;
2646   PetscInt        *xadj,*adjncy,*oldranks;
2647   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2648   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2649   PetscErrorCode  ierr;
2650   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2651   PetscSubcomm    subcomm;
2652 
2653   PetscFunctionBegin;
2654   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2655   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2656   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2657 
2658   /* Get info on mapping */
2659   matis = (Mat_IS*)(mat->data);
2660   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2661   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2662 
2663   /* build local CSR graph of subdomains' connectivity */
2664   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2665   xadj[0] = 0;
2666   xadj[1] = PetscMax(n_neighs-1,0);
2667   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2668   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2669 
2670   if (threshold) {
2671     PetscInt* count,min_threshold;
2672     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2673     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2674     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2675       for (j=0;j<n_shared[i];j++) {
2676         count[shared[i][j]] += 1;
2677       }
2678     }
2679     /* adapt threshold since we dont want to lose significant connections */
2680     min_threshold = n_neighs;
2681     for (i=1;i<n_neighs;i++) {
2682       for (j=0;j<n_shared[i];j++) {
2683         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2684       }
2685     }
2686     threshold = PetscMax(min_threshold+1,threshold);
2687     xadj[1] = 0;
2688     for (i=1;i<n_neighs;i++) {
2689       for (j=0;j<n_shared[i];j++) {
2690         if (count[shared[i][j]] < threshold) {
2691           adjncy[xadj[1]] = neighs[i];
2692           adjncy_wgt[xadj[1]] = n_shared[i];
2693           xadj[1]++;
2694           break;
2695         }
2696       }
2697     }
2698     ierr = PetscFree(count);CHKERRQ(ierr);
2699   } else {
2700     if (xadj[1]) {
2701       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2702       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2703     }
2704   }
2705   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2706   if (use_square) {
2707     for (i=0;i<xadj[1];i++) {
2708       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2709     }
2710   }
2711   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2712 
2713   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2714 
2715   /*
2716     Restrict work on active processes only.
2717   */
2718   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2719   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2720   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2721   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2722   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2723   if (color) {
2724     ierr = PetscFree(xadj);CHKERRQ(ierr);
2725     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2726     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2727   } else {
2728     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2729     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2730     prank = rank;
2731     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2732     for (i=0;i<size;i++) {
2733       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2734     }
2735     for (i=0;i<xadj[1];i++) {
2736       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2737     }
2738     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2739     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2740     n_subdomains = (PetscInt)size;
2741     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2742 
2743     /* Partition */
2744     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2745     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2746     if (use_vwgt) {
2747       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2748       v_wgt[0] = local_size;
2749       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2750     }
2751     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2752     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2753     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2754     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2755     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2756 
2757     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2758     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2759     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2760     /* clean up */
2761     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2762     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2763     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2764     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2765   }
2766   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2767 
2768   /* assemble parallel IS for sends */
2769   i = 1;
2770   if (color) i=0;
2771   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2772   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2773 
2774   /* get back IS */
2775   *is_sends = ranks_send_to;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatISSubassemble"
2783 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2784 {
2785   Mat                    local_mat,new_mat;
2786   Mat_IS                 *matis;
2787   IS                     is_sends_internal;
2788   PetscInt               rows,cols;
2789   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2790   PetscBool              ismatis,isdense;
2791   ISLocalToGlobalMapping l2gmap;
2792   PetscInt*              l2gmap_indices;
2793   const PetscInt*        is_indices;
2794   MatType                new_local_type;
2795   MatTypePrivate         new_local_type_private;
2796   /* buffers */
2797   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2798   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2799   /* MPI */
2800   MPI_Comm               comm;
2801   PetscMPIInt            n_sends,n_recvs,commsize;
2802   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2803   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2804   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2805   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2806   PetscErrorCode         ierr;
2807 
2808   PetscFunctionBegin;
2809   /* checks */
2810   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2811   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2812   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2813   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2814   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2815   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2816   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2817   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2818   PetscValidLogicalCollectiveInt(mat,bs,0);
2819   /* prepare IS for sending if not provided */
2820   if (!is_sends) {
2821     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2822     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2823   } else {
2824     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2825     is_sends_internal = is_sends;
2826   }
2827 
2828   /* get pointer of MATIS data */
2829   matis = (Mat_IS*)mat->data;
2830 
2831   /* get comm */
2832   comm = PetscObjectComm((PetscObject)mat);
2833 
2834   /* compute number of sends */
2835   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2836   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2837 
2838   /* compute number of receives */
2839   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2840   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2841   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2842   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2843   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2844   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2845   ierr = PetscFree(iflags);CHKERRQ(ierr);
2846 
2847   /* prepare send/receive buffers */
2848   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2849   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2850   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2851   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2852 
2853   /* Get data from local mat */
2854   if (!isdense) {
2855     /* TODO: See below some guidelines on how to prepare the local buffers */
2856     /*
2857        send_buffer_vals should contain the raw values of the local matrix
2858        send_buffer_idxs should contain:
2859        - MatType_PRIVATE type
2860        - PetscInt        size_of_l2gmap
2861        - PetscInt        global_row_indices[size_of_l2gmap]
2862        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2863     */
2864   } else {
2865     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2866     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2867     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2868     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2869     send_buffer_idxs[1] = i;
2870     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2871     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2872     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2873     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2874     for (i=0;i<n_sends;i++) {
2875       ilengths_vals[is_indices[i]] = len*len;
2876       ilengths_idxs[is_indices[i]] = len+2;
2877     }
2878   }
2879   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2880   buf_size_idxs = 0;
2881   buf_size_vals = 0;
2882   for (i=0;i<n_recvs;i++) {
2883     buf_size_idxs += (PetscInt)olengths_idxs[i];
2884     buf_size_vals += (PetscInt)olengths_vals[i];
2885   }
2886   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2887   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2888 
2889   /* get new tags for clean communications */
2890   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2891   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2892 
2893   /* allocate for requests */
2894   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2895   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2896   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2897   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2898 
2899   /* communications */
2900   ptr_idxs = recv_buffer_idxs;
2901   ptr_vals = recv_buffer_vals;
2902   for (i=0;i<n_recvs;i++) {
2903     source_dest = onodes[i];
2904     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2905     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2906     ptr_idxs += olengths_idxs[i];
2907     ptr_vals += olengths_vals[i];
2908   }
2909   for (i=0;i<n_sends;i++) {
2910     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2911     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2912     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2913   }
2914   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2915   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2916 
2917   /* assemble new l2g map */
2918   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2919   ptr_idxs = recv_buffer_idxs;
2920   buf_size_idxs = 0;
2921   for (i=0;i<n_recvs;i++) {
2922     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2923     ptr_idxs += olengths_idxs[i];
2924   }
2925   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2926   ptr_idxs = recv_buffer_idxs;
2927   buf_size_idxs = 0;
2928   for (i=0;i<n_recvs;i++) {
2929     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2930     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2931     ptr_idxs += olengths_idxs[i];
2932   }
2933   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2934   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2935   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2936 
2937   /* infer new local matrix type from received local matrices type */
2938   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2939   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2940   new_local_type_private = MATAIJ_PRIVATE;
2941   new_local_type = MATSEQAIJ;
2942   if (n_recvs) {
2943     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2944     ptr_idxs = recv_buffer_idxs;
2945     for (i=0;i<n_recvs;i++) {
2946       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2947         new_local_type_private = MATAIJ_PRIVATE;
2948         break;
2949       }
2950       ptr_idxs += olengths_idxs[i];
2951     }
2952     switch (new_local_type_private) {
2953       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2954         new_local_type = MATSEQAIJ;
2955         bs = 1;
2956         break;
2957       case MATAIJ_PRIVATE:
2958         new_local_type = MATSEQAIJ;
2959         bs = 1;
2960         break;
2961       case MATBAIJ_PRIVATE:
2962         new_local_type = MATSEQBAIJ;
2963         break;
2964       case MATSBAIJ_PRIVATE:
2965         new_local_type = MATSEQSBAIJ;
2966         break;
2967       default:
2968         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2969         break;
2970     }
2971   }
2972 
2973   /* create MATIS object */
2974   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2975   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2976   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2977   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2978   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2979   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2980 
2981   /* set values */
2982   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2983   ptr_vals = recv_buffer_vals;
2984   ptr_idxs = recv_buffer_idxs;
2985   for (i=0;i<n_recvs;i++) {
2986     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2987       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2988       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2989       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2990       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2991       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2992     }
2993     ptr_idxs += olengths_idxs[i];
2994     ptr_vals += olengths_vals[i];
2995   }
2996   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3000 
3001   { /* check */
3002     Vec       lvec,rvec;
3003     PetscReal infty_error;
3004 
3005     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3006     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3007     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3008     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3009     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3010     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3011     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3012     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3013     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3014   }
3015 
3016   /* free workspace */
3017   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3018   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3019   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3020   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3021   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3022   if (isdense) {
3023     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3024     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3025   } else {
3026     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3027   }
3028   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3029   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3030   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3031   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3032   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3033   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3034   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3035   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3036   ierr = PetscFree(onodes);CHKERRQ(ierr);
3037   /* get back new mat */
3038   *mat_n = new_mat;
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 #undef __FUNCT__
3043 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3044 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3045 {
3046   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3047   PC_IS                  *pcis = (PC_IS*)pc->data;
3048   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3049   MatNullSpace           CoarseNullSpace=NULL;
3050   ISLocalToGlobalMapping coarse_islg;
3051   IS                     coarse_is;
3052   PetscInt               max_it;
3053   PetscInt               im_active=-1,active_procs=-1;
3054   PC                     pc_temp;
3055   PCType                 coarse_pc_type;
3056   KSPType                coarse_ksp_type;
3057   PetscBool              multilevel_requested,multilevel_allowed;
3058   PetscBool              setsym,issym,isbddc,isnn;
3059   MatStructure           matstruct;
3060   PetscErrorCode         ierr;
3061 
3062   PetscFunctionBegin;
3063   /* Assign global numbering to coarse dofs */
3064   if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */
3065     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3066     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3067     /* see if we can avoid some work */
3068     if (pcbddc->coarse_ksp) { /* already present */
3069       Mat tcoarse_mat;
3070       PetscInt ocoarse_size;
3071       ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr);
3072       ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr);
3073       if (ocoarse_size != pcbddc->coarse_size) { /* we need to destroy the KSP */
3074         ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
3075       }
3076     }
3077   }
3078 
3079   /* infer some info from user */
3080   issym = PETSC_FALSE;
3081   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3082   multilevel_allowed = PETSC_FALSE;
3083   multilevel_requested = PETSC_FALSE;
3084   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3085   if (multilevel_requested) {
3086     /* count "active processes" */
3087     im_active = !!(pcis->n);
3088     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3089     if (active_procs/pcbddc->coarsening_ratio < 2) {
3090       multilevel_allowed = PETSC_FALSE;
3091     } else {
3092       multilevel_allowed = PETSC_TRUE;
3093     }
3094   }
3095 
3096   /* set defaults for coarse KSP and PC */
3097   if (multilevel_allowed) {
3098     if (issym) {
3099       coarse_ksp_type = KSPRICHARDSON;
3100     } else {
3101       coarse_ksp_type = KSPCHEBYSHEV;
3102     }
3103     coarse_pc_type = PCBDDC;
3104   } else {
3105     coarse_ksp_type = KSPPREONLY;
3106     coarse_pc_type = PCREDUNDANT;
3107   }
3108 
3109   /* create the coarse KSP object only once with defaults */
3110   if (!pcbddc->coarse_ksp) {
3111     char prefix[256],str_level[3];
3112     size_t len;
3113     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3114     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3115     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3116     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3117     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3118     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3119     /* prefix */
3120     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3121     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3122     if (!pcbddc->current_level) {
3123       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3124       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3125     } else {
3126       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3127       if (pcbddc->current_level>1) len -= 2;
3128       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3129       *(prefix+len)='\0';
3130       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3131       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3132     }
3133     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3134   }
3135   /* allow user customization */
3136   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3137   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3138   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3139 
3140   /* get some info after set from options */
3141   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3142   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3143   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3144   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3145     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3146     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3147     isbddc = PETSC_FALSE;
3148   }
3149 
3150   /* propagate BDDC info to the next level */
3151   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3152   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3153   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3154 
3155   /* creates temporary MATIS object for coarse matrix */
3156   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3157   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3158   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3159   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3160   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3161   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3164   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3165 
3166   /* assemble coarse matrix */
3167   if (isbddc || isnn) {
3168     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3169   } else { /* need to provide reuse */
3170     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3171   }
3172   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3173 
3174   /* create local to global scatters for coarse problem */
3175   if (pcbddc->new_primal_space) {
3176     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3177     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3178     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3179     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3180     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3181   }
3182   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3183 
3184   /* propagate symmetry info to coarse matrix */
3185   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3186 
3187   /* Compute coarse null space (special handling by BDDC only) */
3188   if (pcbddc->NullSpace) {
3189     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3190     if (isbddc) {
3191       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3192     } else {
3193       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3194     }
3195   }
3196 
3197   /* set operators */
3198   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3199   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3200 
3201   /* additional KSP customization */
3202   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3203   if (max_it < 5) {
3204     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3205   }
3206   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3207 
3208 
3209   /* print some info if requested */
3210   if (pcbddc->dbg_flag) {
3211     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3212     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3213     if (!multilevel_allowed) {
3214       if (multilevel_requested) {
3215         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3216       } else if (pcbddc->max_levels) {
3217         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3218       }
3219     }
3220     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3221     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3222   }
3223 
3224   /* setup coarse ksp */
3225   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3226   if (pcbddc->dbg_flag) {
3227     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3228     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3229     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3230     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3231   }
3232 
3233   /* Check coarse problem if requested */
3234   if (pcbddc->dbg_flag) {
3235     KSP       check_ksp;
3236     KSPType   check_ksp_type;
3237     PC        check_pc;
3238     Vec       check_vec;
3239     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3240     PetscInt  its;
3241     PetscBool ispreonly,compute;
3242 
3243     /* Create ksp object suitable for estimation of extreme eigenvalues */
3244     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3245     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3246     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3247     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3248     if (ispreonly) {
3249       check_ksp_type = KSPPREONLY;
3250       compute = PETSC_FALSE;
3251     } else {
3252       if (issym) check_ksp_type = KSPCG;
3253       else check_ksp_type = KSPGMRES;
3254       compute = PETSC_TRUE;
3255     }
3256     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3257     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3258     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3259     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3260     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3261     /* create random vec */
3262     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3263     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3264     if (CoarseNullSpace) {
3265       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3266     }
3267     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3268     /* solve coarse problem */
3269     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3270     if (CoarseNullSpace) {
3271       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3272     }
3273     /* check coarse problem residual error */
3274     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3275     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3276     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3277     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3278     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3279     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3280     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3281     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3282     /* get eigenvalue estimation if preonly has not been requested */
3283     if (!ispreonly) {
3284       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3285       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3286       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3287     }
3288     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3289     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3290   }
3291   /* free memory */
3292   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3293   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 #undef __FUNCT__
3298 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3299 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3300 {
3301   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3302   PC_IS*         pcis = (PC_IS*)pc->data;
3303   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3304   PetscInt       i,j,coarse_size;
3305   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3306   PetscErrorCode ierr;
3307 
3308   PetscFunctionBegin;
3309   /* get indices in local ordering for vertices and constraints */
3310   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3311   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3312   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3313   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3314   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3315   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3316   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3317 
3318   /* Compute global number of coarse dofs */
3319   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3320 
3321   /* check numbering */
3322   if (pcbddc->dbg_flag) {
3323     PetscScalar coarsesum,*array;
3324 
3325     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3327     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3328     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3329     for (i=0;i<pcbddc->local_primal_size;i++) {
3330       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3331     }
3332     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3333     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3334     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3335     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3336     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3337     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3338     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3339     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3340     for (i=0;i<pcis->n;i++) {
3341       if (array[i] == 1.0) {
3342         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3343       }
3344     }
3345     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3346     for (i=0;i<pcis->n;i++) {
3347       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3348     }
3349     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3350     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3351     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3352     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3353     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3354     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3355     if (pcbddc->dbg_flag > 1) {
3356       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3357       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3358       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3359       for (i=0;i<pcbddc->local_primal_size;i++) {
3360         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3361       }
3362       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3363     }
3364     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3365   }
3366   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3367   /* get back data */
3368   *coarse_size_n = coarse_size;
3369   *local_primal_indices_n = local_primal_indices;
3370   PetscFunctionReturn(0);
3371 }
3372 
3373