xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 20a2ab83191f0e540d79dc30240b78df045b94f2)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
110   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
114   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCBDDCCreateWorkVectors"
122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
123 {
124   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
125   PC_IS          *pcis = (PC_IS*)pc->data;
126   VecType        impVecType;
127   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
132   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
133   local_primal_size = n_constraints+n_vertices;
134   n_R = pcis->n-n_vertices;
135   /* local work vectors */
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
141   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
142   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
143   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
144   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
145   if (n_constraints) {
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
156 {
157   PetscErrorCode         ierr;
158   /* pointers to pcis and pcbddc */
159   PC_IS*                 pcis = (PC_IS*)pc->data;
160   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
161   /* submatrices of local problem */
162   Mat                    A_RV,A_VR,A_VV;
163   /* working matrices */
164   Mat                    M1,M2,M3,C_CR;
165   /* working vectors */
166   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
167   /* additional working stuff */
168   IS                     is_aux;
169   ISLocalToGlobalMapping BtoNmap;
170   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
171   const PetscScalar      *array,*row_cmat_values;
172   const PetscInt         *row_cmat_indices,*idx_R_local;
173   PetscInt               *vertices,*idx_V_B,*auxindices;
174   PetscInt               n_vertices,n_constraints,size_of_constraint;
175   PetscInt               i,j,n_R,n_D,n_B;
176   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
177   /* Vector and matrix types */
178   VecType                impVecType;
179   MatType                impMatType;
180   /* some shortcuts to scalars */
181   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
182   /* for debugging purposes */
183   PetscReal              *coarsefunctions_errors,*constraints_errors;
184 
185   PetscFunctionBegin;
186   /* get number of vertices and their local indices */
187   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
188   n_constraints = pcbddc->local_primal_size-n_vertices;
189   /* Set Non-overlapping dimensions */
190   n_B = pcis->n_B; n_D = pcis->n - n_B;
191   n_R = pcis->n-n_vertices;
192 
193   /* Set types for local objects needed by BDDC precondtioner */
194   impMatType = MATSEQDENSE;
195   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
196 
197   /* Allocating some extra storage just to be safe */
198   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
199   for (i=0;i<pcis->n;i++) auxindices[i]=i;
200 
201   /* vertices in boundary numbering */
202   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
203   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
204   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
205   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
206   if (i != n_vertices) {
207     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
208   }
209 
210   /* some work vectors on vertices and/or constraints */
211   if (n_vertices) {
212     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
213     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
214     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
215     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
216   }
217   if (n_constraints) {
218     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
219     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
220   }
221 
222   /* Precompute stuffs needed for preprocessing and application of BDDC*/
223   if (n_constraints) {
224     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
225     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
226     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
227     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
228 
229     /* Extract constraints on R nodes: C_{CR}  */
230     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
231     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
232     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
233 
234     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
235     for (i=0;i<n_constraints;i++) {
236       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
237       /* Get row of constraint matrix in R numbering */
238       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
239       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
240       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
241       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
242       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
243       /* Solve for row of constraint matrix in R numbering */
244       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
245       /* Set values in local_auxmat2 */
246       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
247       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
248       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
249     }
250     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
253 
254     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
255     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
256     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
257     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
258     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
259     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
260     ierr = MatSetUp(M1);CHKERRQ(ierr);
261     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
262     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
263     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
264     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
265     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
266     ierr = MatDestroy(&M2);CHKERRQ(ierr);
267     ierr = MatDestroy(&M3);CHKERRQ(ierr);
268     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
269     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
270   }
271 
272   /* Get submatrices from subdomain matrix */
273   if (n_vertices) {
274     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
275     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
279   }
280 
281   /* Matrix of coarse basis functions (local) */
282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
283   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
284   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
285   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
286   if (pcbddc->switch_static || pcbddc->dbg_flag) {
287     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
288     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
289     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
290     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
291   }
292 
293   if (pcbddc->dbg_flag) {
294     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
295     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
297   }
298   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
299   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
300 
301   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
302 
303   /* vertices */
304   for (i=0;i<n_vertices;i++) {
305     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
306     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
307     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
308     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
309     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
310     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
311     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
313     if (n_constraints) {
314       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
315       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
316       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
317     }
318     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
319     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
320 
321     /* Set values in coarse basis function and subdomain part of coarse_mat */
322     /* coarse basis functions */
323     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
324     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
327     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
329     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
330     if (pcbddc->switch_static || pcbddc->dbg_flag) {
331       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
334       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
335       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
336     }
337     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
338     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
339     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
340     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
341     if (n_constraints) {
342       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
343       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
345     }
346 
347     /* check */
348     if (pcbddc->dbg_flag) {
349       /* assemble subdomain vector on local nodes */
350       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
351       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
352       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
354       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
355       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
357       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
358       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
359       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
360       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
362       if (n_constraints) {
363         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
364         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
365         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
366       }
367       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
368       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
370       /* check saddle point solution */
371       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
372       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
374       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
375       /* shift by the identity matrix */
376       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
377       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
380     }
381   }
382 
383   /* constraints */
384   for (i=0;i<n_constraints;i++) {
385     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
386     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
387     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
388     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
389     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
390     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
391     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
392     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
393     if (n_vertices) {
394       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
395     }
396     /* Set values in coarse basis function and subdomain part of coarse_mat */
397     /* coarse basis functions */
398     j = i+n_vertices; /* don't touch this! */
399     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
400     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
404     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
405     if (pcbddc->switch_static || pcbddc->dbg_flag) {
406       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
411     }
412     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
413     if (n_vertices) {
414       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
417     }
418     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
420     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
421 
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       if (n_vertices) {
433         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       }
437       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
453     }
454   }
455   /* call assembling routines for local coarse basis */
456   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   if (pcbddc->switch_static || pcbddc->dbg_flag) {
459     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461   }
462 
463   /* compute other basis functions for non-symmetric problems */
464   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
465   if (!setsym || (setsym && !issym)) {
466     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
467     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
469     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
471       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
472       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
473       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
474       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
475     }
476     for (i=0;i<pcbddc->local_primal_size;i++) {
477       if (n_constraints) {
478         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
479         for (j=0;j<n_constraints;j++) {
480           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
481         }
482         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
483         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
484       }
485       if (i<n_vertices) {
486         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
487         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
488         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
490         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
491         if (n_constraints) {
492           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
493         }
494       } else {
495         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
496       }
497       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
498       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
502       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
503       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
504       if (i<n_vertices) {
505         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
506       }
507       if (pcbddc->switch_static || pcbddc->dbg_flag) {
508         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
511         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
513       }
514 
515       if (pcbddc->dbg_flag) {
516         /* assemble subdomain vector on nodes */
517         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
518         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
520         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
521         if (i<n_vertices) {
522           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
523         }
524         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
525         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
526         /* assemble subdomain vector of lagrange multipliers */
527         for (j=0;j<pcbddc->local_primal_size;j++) {
528           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
532         /* check saddle point solution */
533         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
534         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
536         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
537         /* shift by the identity matrix */
538         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542       }
543     }
544     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     if (pcbddc->switch_static || pcbddc->dbg_flag) {
547       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     }
550   }
551   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
552   /* Checking coarse_sub_mat and coarse basis functios */
553   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
554   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   if (pcbddc->dbg_flag) {
556     Mat         coarse_sub_mat;
557     Mat         AUXMAT,TM1,TM2,TM3,TM4;
558     Mat         coarse_phi_D,coarse_phi_B;
559     Mat         coarse_psi_D,coarse_psi_B;
560     Mat         A_II,A_BB,A_IB,A_BI;
561     MatType     checkmattype=MATSEQAIJ;
562     PetscReal   real_value;
563 
564     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
565     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
568     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
570     if (pcbddc->coarse_psi_B) {
571       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
572       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
573     }
574     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
575 
576     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
578     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
581       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
583       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592     } else {
593       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
594       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     }
602     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
606     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
607     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
614     }
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
616     for (i=0;i<pcbddc->local_primal_size;i++) {
617       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
618     }
619     if (pcbddc->coarse_psi_B) {
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
623       }
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
625       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
626         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
627       }
628     }
629     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
630     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
634     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
638     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
640     if (pcbddc->coarse_psi_B) {
641       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
642       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
643     }
644     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
645     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
646     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
647     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
648   }
649   /* free memory */
650   if (n_vertices) {
651     ierr = PetscFree(vertices);CHKERRQ(ierr);
652     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
654     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
657   }
658   if (n_constraints) {
659     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
660     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
661     ierr = MatDestroy(&M1);CHKERRQ(ierr);
662     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
663   }
664   ierr = PetscFree(auxindices);CHKERRQ(ierr);
665   /* get back data */
666   *coarse_submat_vals_n = coarse_submat_vals;
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
673 {
674   PC_IS*            pcis = (PC_IS*)(pc->data);
675   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
676   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
677   /* manage repeated solves */
678   MatReuse          reuse;
679   MatStructure      matstruct;
680   PetscErrorCode    ierr;
681 
682   PetscFunctionBegin;
683   /* get mat flags */
684   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
685   reuse = MAT_INITIAL_MATRIX;
686   if (pc->setupcalled) {
687     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
688     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
689     if (matstruct == SAME_NONZERO_PATTERN) {
690       reuse = MAT_REUSE_MATRIX;
691     } else {
692       reuse = MAT_INITIAL_MATRIX;
693     }
694   }
695   if (reuse == MAT_INITIAL_MATRIX) {
696     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
701   }
702 
703   /* transform local matrices if needed */
704   if (pcbddc->use_change_of_basis) {
705     Mat         change_mat_all;
706     PetscScalar *row_cmat_values;
707     PetscInt    *row_cmat_indices;
708     PetscInt    *nnz,*is_indices,*temp_indices;
709     PetscInt    i,j,k,n_D,n_B;
710 
711     /* Get Non-overlapping dimensions */
712     n_B = pcis->n_B;
713     n_D = pcis->n-n_B;
714 
715     /* compute nonzero structure of change of basis on all local nodes */
716     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
719     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
720     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     k=1;
722     for (i=0;i<n_B;i++) {
723       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724       nnz[is_indices[i]]=j;
725       if (k < j) k = j;
726       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
727     }
728     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     /* assemble change of basis matrix on the whole set of local dofs */
730     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
731     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
732     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
733     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
734     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
735     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
736     for (i=0;i<n_D;i++) {
737       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
738     }
739     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
744       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
746     }
747     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
748     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
750     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
751     if (i==1) {
752       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
753     } else {
754       Mat work_mat;
755       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
756       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
757       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
758     }
759     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
760     ierr = PetscFree(nnz);CHKERRQ(ierr);
761     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
762   } else {
763     /* without change of basis, the local matrix is unchanged */
764     if (!pcbddc->local_mat) {
765       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
766       pcbddc->local_mat = matis->A;
767     }
768   }
769 
770   /* get submatrices */
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
781 {
782   PC_IS*         pcis = (PC_IS*)(pc->data);
783   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
784   IS             is_aux1,is_aux2;
785   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
786   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
787   PetscBT        bitmask;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* Set Non-overlapping dimensions */
792   n_B = pcis->n_B; n_D = pcis->n - n_B;
793   /* get vertex indices from constraint matrix */
794   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
795   /* Set number of constraints */
796   n_constraints = pcbddc->local_primal_size-n_vertices;
797   /* create auxiliary bitmask */
798   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
799   for (i=0;i<n_vertices;i++) {
800     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
801   }
802   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
803   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
804   for (i=0, n_R=0; i<pcis->n; i++) {
805     if (!PetscBTLookup(bitmask,i)) {
806       idx_R_local[n_R] = i;
807       n_R++;
808     }
809   }
810   ierr = PetscFree(vertices);CHKERRQ(ierr);
811   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
812 
813   /* print some info if requested */
814   if (pcbddc->dbg_flag) {
815     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
821     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
822   }
823 
824   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
825   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
827   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
828   for (i=0; i<n_D; i++) {
829     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
830   }
831   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
832   for (i=0, j=0; i<n_R; i++) {
833     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
834       aux_array1[j++] = i;
835     }
836   }
837   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
838   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   for (i=0, j=0; i<n_B; i++) {
840     if (!PetscBTLookup(bitmask,is_indices[i])) {
841       aux_array2[j++] = i;
842     }
843   }
844   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
846   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
847   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
849 
850   if (pcbddc->switch_static || pcbddc->dbg_flag) {
851     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
852     for (i=0, j=0; i<n_R; i++) {
853       if (PetscBTLookup(bitmask,idx_R_local[i])) {
854         aux_array1[j++] = i;
855       }
856     }
857     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
858     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
859     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
860   }
861   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
868 {
869   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
870 
871   PetscFunctionBegin;
872   pcbddc->use_exact_dirichlet=use;
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
879 {
880   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
881   PC_IS          *pcis = (PC_IS*)pc->data;
882   PC             pc_temp;
883   Mat            A_RR;
884   Vec            vec1,vec2,vec3;
885   MatStructure   matstruct;
886   PetscScalar    m_one = -1.0;
887   PetscReal      value;
888   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   /* Creating PC contexts for local Dirichlet and Neumann problems */
893   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
894 
895   /* DIRICHLET PROBLEM */
896   /* Matrix for Dirichlet problem is pcis->A_II */
897   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
898   if (!pcbddc->ksp_D) { /* create object if not yet build */
899     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
900     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
901     /* default */
902     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
903     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
904     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
905     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
906     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
907   }
908   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
909   /* Allow user's customization */
910   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
911   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
912   if (!n_D) {
913     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
914     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
915   }
916   /* Set Up KSP for Dirichlet problem of BDDC */
917   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
918   /* set ksp_D into pcis data */
919   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
920   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
921   pcis->ksp_D = pcbddc->ksp_D;
922 
923   /* NEUMANN PROBLEM */
924   /* Matrix for Neumann problem is A_RR -> we need to create it */
925   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
926   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
927   if (!pcbddc->ksp_R) { /* create object if not yet build */
928     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
929     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
930     /* default */
931     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
932     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
933     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
934     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
935     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
936   }
937   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
938   /* Allow user's customization */
939   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
940   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
941   if (!n_R) {
942     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
943     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
944   }
945   /* Set Up KSP for Neumann problem of BDDC */
946   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
947 
948   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
949 
950   /* Dirichlet */
951   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
952   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
953   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
954   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
955   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
956   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
957   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
958   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
959   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
960   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
961   /* need to be adapted? */
962   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
963   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
964   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
965   /* print info */
966   if (pcbddc->dbg_flag) {
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
971     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
972   }
973   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
974     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
975   }
976 
977   /* Neumann */
978   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
990   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
991   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
995     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
996   }
997   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
998     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
999   }
1000 
1001   /* free Neumann problem's matrix */
1002   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1008 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1009 {
1010   PetscErrorCode ierr;
1011   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1012 
1013   PetscFunctionBegin;
1014   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015   if (pcbddc->local_auxmat1) {
1016     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1017     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1024 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1025 {
1026   PetscErrorCode ierr;
1027   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1028   PC_IS*            pcis = (PC_IS*)  (pc->data);
1029   const PetscScalar zero = 0.0;
1030 
1031   PetscFunctionBegin;
1032   /* Application of PHI^T (or PSI^T)  */
1033   if (pcbddc->coarse_psi_B) {
1034     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1035     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1036   } else {
1037     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1038     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1039   }
1040   /* Scatter data of coarse_rhs */
1041   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1042   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043 
1044   /* Local solution on R nodes */
1045   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1046   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   if (pcbddc->switch_static) {
1049     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   }
1052   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1053   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1054   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   if (pcbddc->switch_static) {
1057     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   }
1060 
1061   /* Coarse solution */
1062   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1064     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1065   }
1066   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068 
1069   /* Sum contributions from two levels */
1070   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1071   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1089 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1090 {
1091   PetscErrorCode ierr;
1092   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1093 
1094   PetscFunctionBegin;
1095   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* uncomment for testing purposes */
1100 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1101 #undef __FUNCT__
1102 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1103 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1104 {
1105   PetscErrorCode    ierr;
1106   PC_IS*            pcis = (PC_IS*)(pc->data);
1107   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1108   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1109   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1110   MatType           impMatType=MATSEQAIJ;
1111   /* one and zero */
1112   PetscScalar       one=1.0,zero=0.0;
1113   /* space to store constraints and their local indices */
1114   PetscScalar       *temp_quadrature_constraint;
1115   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1116   /* iterators */
1117   PetscInt          i,j,k,total_counts,temp_start_ptr;
1118   /* stuff to store connected components stored in pcbddc->mat_graph */
1119   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1120   PetscInt          n_ISForFaces,n_ISForEdges;
1121   /* near null space stuff */
1122   MatNullSpace      nearnullsp;
1123   const Vec         *nearnullvecs;
1124   Vec               *localnearnullsp;
1125   PetscBool         nnsp_has_cnst;
1126   PetscInt          nnsp_size;
1127   PetscScalar       *array;
1128   /* BLAS integers */
1129   PetscBLASInt      lwork,lierr;
1130   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1131   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1132   /* LAPACK working arrays for SVD or POD */
1133   PetscBool         skip_lapack;
1134   PetscScalar       *work;
1135   PetscReal         *singular_vals;
1136 #if defined(PETSC_USE_COMPLEX)
1137   PetscReal         *rwork;
1138 #endif
1139 #if defined(PETSC_MISSING_LAPACK_GESVD)
1140   PetscBLASInt      Blas_one_2=1;
1141   PetscScalar       *temp_basis,*correlation_mat;
1142 #else
1143   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1144   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1145 #endif
1146   /* change of basis */
1147   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1148   PetscBool         boolforchange;
1149   PetscBT           touched,change_basis;
1150   /* auxiliary stuff */
1151   PetscInt          *nnz,*is_indices,*local_to_B;
1152   /* some quantities */
1153   PetscInt          n_vertices,total_primal_vertices;
1154   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1155 
1156 
1157   PetscFunctionBegin;
1158   /* Get index sets for faces, edges and vertices from graph */
1159   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1160     pcbddc->use_vertices = PETSC_TRUE;
1161   }
1162   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1163   /* print some info */
1164   if (pcbddc->dbg_flag) {
1165     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1166     i = 0;
1167     if (ISForVertices) {
1168       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1171     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1172     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1173     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1174   }
1175   /* check if near null space is attached to global mat */
1176   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1177   if (nearnullsp) {
1178     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1179   } else { /* if near null space is not provided BDDC uses constants by default */
1180     nnsp_size = 0;
1181     nnsp_has_cnst = PETSC_TRUE;
1182   }
1183   /* get max number of constraints on a single cc */
1184   max_constraints = nnsp_size;
1185   if (nnsp_has_cnst) max_constraints++;
1186 
1187   /*
1188        Evaluate maximum storage size needed by the procedure
1189        - temp_indices will contain start index of each constraint stored as follows
1190        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1191        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1192        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1193                                                                                                                                                          */
1194   total_counts = n_ISForFaces+n_ISForEdges;
1195   total_counts *= max_constraints;
1196   n_vertices = 0;
1197   if (ISForVertices) {
1198     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1199   }
1200   total_counts += n_vertices;
1201   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1202   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1203   total_counts = 0;
1204   max_size_of_constraint = 0;
1205   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1206     if (i<n_ISForEdges) {
1207       used_IS = &ISForEdges[i];
1208     } else {
1209       used_IS = &ISForFaces[i-n_ISForEdges];
1210     }
1211     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1212     total_counts += j;
1213     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1214   }
1215   total_counts *= max_constraints;
1216   total_counts += n_vertices;
1217   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1218   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1219   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1220   /* local to boundary numbering */
1221   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1222   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1223   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1224   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1225   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1226   /* get local part of global near null space vectors */
1227   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1228   for (k=0;k<nnsp_size;k++) {
1229     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1230     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   }
1233 
1234   /* whether or not to skip lapack calls */
1235   skip_lapack = PETSC_TRUE;
1236   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1237 
1238   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1239   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1240     PetscScalar temp_work;
1241 #if defined(PETSC_MISSING_LAPACK_GESVD)
1242     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1243     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1244     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1245     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1246 #if defined(PETSC_USE_COMPLEX)
1247     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1248 #endif
1249     /* now we evaluate the optimal workspace using query with lwork=-1 */
1250     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1251     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1252     lwork = -1;
1253     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1254 #if !defined(PETSC_USE_COMPLEX)
1255     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1256 #else
1257     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1258 #endif
1259     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1260     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1261 #else /* on missing GESVD */
1262     /* SVD */
1263     PetscInt max_n,min_n;
1264     max_n = max_size_of_constraint;
1265     min_n = max_constraints;
1266     if (max_size_of_constraint < max_constraints) {
1267       min_n = max_size_of_constraint;
1268       max_n = max_constraints;
1269     }
1270     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1271 #if defined(PETSC_USE_COMPLEX)
1272     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1273 #endif
1274     /* now we evaluate the optimal workspace using query with lwork=-1 */
1275     lwork = -1;
1276     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1277     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1278     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1279     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1280 #if !defined(PETSC_USE_COMPLEX)
1281     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1282 #else
1283     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1284 #endif
1285     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1286     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1287 #endif /* on missing GESVD */
1288     /* Allocate optimal workspace */
1289     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1290     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1291   }
1292   /* Now we can loop on constraining sets */
1293   total_counts = 0;
1294   temp_indices[0] = 0;
1295   /* vertices */
1296   if (ISForVertices) {
1297     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298     if (nnsp_has_cnst) { /* consider all vertices */
1299       for (i=0;i<n_vertices;i++) {
1300         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1301         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1302         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1303         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1304         total_counts++;
1305       }
1306     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1307       PetscBool used_vertex;
1308       for (i=0;i<n_vertices;i++) {
1309         used_vertex = PETSC_FALSE;
1310         k = 0;
1311         while (!used_vertex && k<nnsp_size) {
1312           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1313           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1314             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1315             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1316             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1317             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1318             total_counts++;
1319             used_vertex = PETSC_TRUE;
1320           }
1321           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1322           k++;
1323         }
1324       }
1325     }
1326     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1327     n_vertices = total_counts;
1328   }
1329 
1330   /* edges and faces */
1331   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1332     if (i<n_ISForEdges) {
1333       used_IS = &ISForEdges[i];
1334       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1335     } else {
1336       used_IS = &ISForFaces[i-n_ISForEdges];
1337       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1338     }
1339     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1340     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1341     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1342     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1343     /* change of basis should not be performed on local periodic nodes */
1344     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1345     if (nnsp_has_cnst) {
1346       PetscScalar quad_value;
1347       temp_constraints++;
1348       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1349       for (j=0;j<size_of_constraint;j++) {
1350         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1351         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1352         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1353       }
1354       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1355       if (boolforchange) {
1356         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1357       }
1358       total_counts++;
1359     }
1360     for (k=0;k<nnsp_size;k++) {
1361       PetscReal real_value;
1362       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1363       for (j=0;j<size_of_constraint;j++) {
1364         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1365         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1366         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1367       }
1368       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1369       /* check if array is null on the connected component */
1370       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1371       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1372       if (real_value > 0.0) { /* keep indices and values */
1373         temp_constraints++;
1374         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1375         if (boolforchange) {
1376           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1377         }
1378         total_counts++;
1379       }
1380     }
1381     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1382     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1383     if (!pcbddc->use_nnsp_true) {
1384       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1385 
1386 #if defined(PETSC_MISSING_LAPACK_GESVD)
1387       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1388          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1389          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1390             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1391             from that computed using LAPACKgesvd
1392          -> This is due to a different computation of eigenvectors in LAPACKheev
1393          -> The quality of the POD-computed basis will be the same */
1394       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1395       /* Store upper triangular part of correlation matrix */
1396       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1397       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1398       for (j=0;j<temp_constraints;j++) {
1399         for (k=0;k<j+1;k++) {
1400           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1401         }
1402       }
1403       /* compute eigenvalues and eigenvectors of correlation matrix */
1404       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1405       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1406 #if !defined(PETSC_USE_COMPLEX)
1407       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1408 #else
1409       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1410 #endif
1411       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1412       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1413       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1414       j=0;
1415       while (j < temp_constraints && singular_vals[j] < tol) j++;
1416       total_counts=total_counts-j;
1417       /* scale and copy POD basis into used quadrature memory */
1418       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1419       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1420       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1421       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1422       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1423       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1424       if (j<temp_constraints) {
1425         PetscInt ii;
1426         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1427         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1428         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1429         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1430         for (k=0;k<temp_constraints-j;k++) {
1431           for (ii=0;ii<size_of_constraint;ii++) {
1432             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1433           }
1434         }
1435       }
1436 #else  /* on missing GESVD */
1437       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1438       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1439       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1440       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1441 #if !defined(PETSC_USE_COMPLEX)
1442       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1443 #else
1444       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1445 #endif
1446       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1447       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1448       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1449       k = temp_constraints;
1450       if (k > size_of_constraint) k = size_of_constraint;
1451       j = 0;
1452       while (j < k && singular_vals[k-j-1] < tol) j++;
1453       total_counts = total_counts-temp_constraints+k-j;
1454 #endif /* on missing GESVD */
1455     }
1456   }
1457   /* free index sets of faces, edges and vertices */
1458   for (i=0;i<n_ISForFaces;i++) {
1459     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1460   }
1461   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1462   for (i=0;i<n_ISForEdges;i++) {
1463     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1464   }
1465   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1466   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1467 
1468   /* free workspace */
1469   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1470     ierr = PetscFree(work);CHKERRQ(ierr);
1471 #if defined(PETSC_USE_COMPLEX)
1472     ierr = PetscFree(rwork);CHKERRQ(ierr);
1473 #endif
1474     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1475 #if defined(PETSC_MISSING_LAPACK_GESVD)
1476     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1477     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1478 #endif
1479   }
1480   for (k=0;k<nnsp_size;k++) {
1481     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1482   }
1483   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1484 
1485   /* set quantities in pcbddc data structure */
1486   /* n_vertices defines the number of subdomain corners in the primal space */
1487   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1488   pcbddc->local_primal_size = total_counts;
1489   pcbddc->n_vertices = n_vertices;
1490   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1491 
1492   /* Create constraint matrix */
1493   /* The constraint matrix is used to compute the l2g map of primal dofs */
1494   /* so we need to set it up properly either with or without change of basis */
1495   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1496   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1497   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1498   /* array to compute a local numbering of constraints : vertices first then constraints */
1499   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1500   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1501   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1502   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1503   /* auxiliary stuff for basis change */
1504   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1505   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1506 
1507   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1508   total_primal_vertices=0;
1509   for (i=0;i<pcbddc->local_primal_size;i++) {
1510     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1511     if (size_of_constraint == 1) {
1512       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1513       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1514       aux_primal_minloc[total_primal_vertices]=0;
1515       total_primal_vertices++;
1516     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1517       PetscInt min_loc,min_index;
1518       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1519       /* find first untouched local node */
1520       k = 0;
1521       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1522       min_index = global_indices[k];
1523       min_loc = k;
1524       /* search the minimum among global nodes already untouched on the cc */
1525       for (k=1;k<size_of_constraint;k++) {
1526         /* there can be more than one constraint on a single connected component */
1527         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1528           min_index = global_indices[k];
1529           min_loc = k;
1530         }
1531       }
1532       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1533       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1534       aux_primal_minloc[total_primal_vertices]=min_loc;
1535       total_primal_vertices++;
1536     }
1537   }
1538   /* free workspace */
1539   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1540   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1541   /* permute indices in order to have a sorted set of vertices */
1542   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1543 
1544   /* nonzero structure of constraint matrix */
1545   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1546   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1547   j=total_primal_vertices;
1548   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1549     if (!PetscBTLookup(change_basis,i)) {
1550       nnz[j]=temp_indices[i+1]-temp_indices[i];
1551       j++;
1552     }
1553   }
1554   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1555   ierr = PetscFree(nnz);CHKERRQ(ierr);
1556   /* set values in constraint matrix */
1557   for (i=0;i<total_primal_vertices;i++) {
1558     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1559   }
1560   total_counts = total_primal_vertices;
1561   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1562     if (!PetscBTLookup(change_basis,i)) {
1563       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1564       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1565       total_counts++;
1566     }
1567   }
1568   /* assembling */
1569   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1571   /*
1572   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1573   */
1574   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1575   if (pcbddc->use_change_of_basis) {
1576     PetscBool qr_needed = PETSC_FALSE;
1577     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1578     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1579     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1580     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1581     /* work arrays */
1582     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1583     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1584     /* nonzeros per row */
1585     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1586       if (PetscBTLookup(change_basis,i)) {
1587         qr_needed = PETSC_TRUE;
1588         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1589         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1590       }
1591     }
1592     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1593     ierr = PetscFree(nnz);CHKERRQ(ierr);
1594     /* Set initial identity in the matrix */
1595     for (i=0;i<pcis->n_B;i++) {
1596       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1597     }
1598 
1599     /* Now we loop on the constraints which need a change of basis */
1600     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1601     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1602     /* Change of basis matrix T computed via QR decomposition of constraints */
1603     if (qr_needed) {
1604       /* dual and primal dofs on a single cc */
1605       PetscInt     dual_dofs,primal_dofs;
1606       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1607       PetscInt     primal_counter;
1608       /* working stuff for GEQRF */
1609       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1610       PetscBLASInt lqr_work;
1611       /* working stuff for UNGQR */
1612       PetscScalar  *gqr_work,lgqr_work_t;
1613       PetscBLASInt lgqr_work;
1614       /* working stuff for TRTRS */
1615       PetscScalar  *trs_rhs;
1616       PetscBLASInt Blas_NRHS;
1617       /* pointers for values insertion into change of basis matrix */
1618       PetscInt     *start_rows,*start_cols;
1619       PetscScalar  *start_vals;
1620       /* working stuff for values insertion */
1621       PetscBT      is_primal;
1622 
1623       /* space to store Q */
1624       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1625       /* first we issue queries for optimal work */
1626       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1628       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1629       lqr_work = -1;
1630       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1631       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1632       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1633       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1634       lgqr_work = -1;
1635       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1637       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1638       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1639       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1640       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1641       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1642       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1643       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1644       /* array to store scaling factors for reflectors */
1645       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1646       /* array to store rhs and solution of triangular solver */
1647       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1648       /* array to store whether a node is primal or not */
1649       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1650       for (i=0;i<total_primal_vertices;i++) {
1651         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1652       }
1653 
1654       /* allocating workspace for check */
1655       if (pcbddc->dbg_flag) {
1656         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1657         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1658         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1659       }
1660 
1661       /* loop on constraints and see whether or not they need a change of basis */
1662       /* -> using implicit ordering contained in temp_indices data */
1663       total_counts = pcbddc->n_vertices;
1664       primal_counter = total_counts;
1665       while (total_counts<pcbddc->local_primal_size) {
1666         primal_dofs = 1;
1667         if (PetscBTLookup(change_basis,total_counts)) {
1668           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1669           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1670             primal_dofs++;
1671           }
1672           /* get constraint info */
1673           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1674           dual_dofs = size_of_constraint-primal_dofs;
1675 
1676           /* copy quadrature constraints for change of basis check */
1677           if (pcbddc->dbg_flag) {
1678             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1679             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1680           }
1681 
1682           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1683           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1684 
1685           /* compute QR decomposition of constraints */
1686           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1687           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1688           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1689           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1690           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1691           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1692           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1693 
1694           /* explictly compute R^-T */
1695           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1696           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1697           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1698           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1699           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1700           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1701           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1702           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1703           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1704           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1705 
1706           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1709           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1710           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1711           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1712           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1713           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1714           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1715 
1716           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1717              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1718              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1719           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1720           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1721           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1722           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1723           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1724           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1725           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1726           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1727           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1728           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1729 
1730           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1731           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1732           /* insert cols for primal dofs */
1733           for (j=0;j<primal_dofs;j++) {
1734             start_vals = &qr_basis[j*size_of_constraint];
1735             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1736             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1737           }
1738           /* insert cols for dual dofs */
1739           for (j=0,k=0;j<dual_dofs;k++) {
1740             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1741               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1742               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1743               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1744               j++;
1745             }
1746           }
1747 
1748           /* check change of basis */
1749           if (pcbddc->dbg_flag) {
1750             PetscInt   ii,jj;
1751             PetscBool valid_qr=PETSC_TRUE;
1752             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1753             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1754             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1755             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1756             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1757             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1758             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1759             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1760             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1761             for (jj=0;jj<size_of_constraint;jj++) {
1762               for (ii=0;ii<primal_dofs;ii++) {
1763                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1764                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1765               }
1766             }
1767             if (!valid_qr) {
1768               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1769               for (jj=0;jj<size_of_constraint;jj++) {
1770                 for (ii=0;ii<primal_dofs;ii++) {
1771                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1772                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1773                   }
1774                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1775                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1776                   }
1777                 }
1778               }
1779             } else {
1780               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1781             }
1782           }
1783           /* increment primal counter */
1784           primal_counter += primal_dofs;
1785         } else {
1786           if (pcbddc->dbg_flag) {
1787             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1788           }
1789         }
1790         /* increment constraint counter total_counts */
1791         total_counts += primal_dofs;
1792       }
1793       if (pcbddc->dbg_flag) {
1794         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1795         ierr = PetscFree(work);CHKERRQ(ierr);
1796       }
1797       /* free workspace */
1798       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1799       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1800       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1801       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1802       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1803       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1804     }
1805     /* assembling */
1806     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808     /*
1809     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1810     */
1811   }
1812   /* free workspace */
1813   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1814   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1815   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1816   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1817   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1818   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1819   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1820   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1826 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1827 {
1828   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1829   PC_IS       *pcis = (PC_IS*)pc->data;
1830   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1831   PetscInt    bs,ierr,i,vertex_size;
1832   PetscViewer viewer=pcbddc->dbg_viewer;
1833 
1834   PetscFunctionBegin;
1835   /* Init local Graph struct */
1836   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1837 
1838   /* Check validity of the csr graph passed in by the user */
1839   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1840     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1841   }
1842   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1843   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1844     Mat mat_adj;
1845     const PetscInt *xadj,*adjncy;
1846     PetscBool flg_row=PETSC_TRUE;
1847 
1848     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1849     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1850     if (!flg_row) {
1851       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1852     }
1853     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1854     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1855     if (!flg_row) {
1856       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1857     }
1858     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1859   }
1860 
1861   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1862   vertex_size = 1;
1863   if (!pcbddc->n_ISForDofs) {
1864     IS *custom_ISForDofs;
1865 
1866     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1867     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1868     for (i=0;i<bs;i++) {
1869       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1870     }
1871     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1872     /* remove my references to IS objects */
1873     for (i=0;i<bs;i++) {
1874       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1875     }
1876     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1877   } else { /* mat block size as vertex size (used for elasticity) */
1878     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1879   }
1880 
1881   /* Setup of Graph */
1882   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1883 
1884   /* Graph's connected components analysis */
1885   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1886 
1887   /* print some info to stdout */
1888   if (pcbddc->dbg_flag) {
1889     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1896 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1897 {
1898   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1899   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1900   PetscErrorCode ierr;
1901 
1902   PetscFunctionBegin;
1903   n = 0;
1904   vertices = 0;
1905   if (pcbddc->ConstraintMatrix) {
1906     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1907     for (i=0;i<local_primal_size;i++) {
1908       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1909       if (size_of_constraint == 1) n++;
1910       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1911     }
1912     if (vertices_idx) {
1913       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1914       n = 0;
1915       for (i=0;i<local_primal_size;i++) {
1916         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1917         if (size_of_constraint == 1) {
1918           vertices[n++]=row_cmat_indices[0];
1919         }
1920         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1921       }
1922     }
1923   }
1924   *n_vertices = n;
1925   if (vertices_idx) *vertices_idx = vertices;
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1931 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1932 {
1933   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1934   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1935   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1936   PetscBT        touched;
1937   PetscErrorCode ierr;
1938 
1939     /* This function assumes that the number of local constraints per connected component
1940        is not greater than the number of nodes defined for the connected component
1941        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1942   PetscFunctionBegin;
1943   n = 0;
1944   constraints_index = 0;
1945   if (pcbddc->ConstraintMatrix) {
1946     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1947     max_size_of_constraint = 0;
1948     for (i=0;i<local_primal_size;i++) {
1949       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1950       if (size_of_constraint > 1) {
1951         n++;
1952       }
1953       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1954       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1955     }
1956     if (constraints_idx) {
1957       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1958       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1959       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1960       n = 0;
1961       for (i=0;i<local_primal_size;i++) {
1962         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1963         if (size_of_constraint > 1) {
1964           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1965           /* find first untouched local node */
1966           j = 0;
1967           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1968           min_index = row_cmat_global_indices[j];
1969           min_loc = j;
1970           /* search the minimum among nodes not yet touched on the connected component
1971              since there can be more than one constraint on a single cc */
1972           for (j=1;j<size_of_constraint;j++) {
1973             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1974               min_index = row_cmat_global_indices[j];
1975               min_loc = j;
1976             }
1977           }
1978           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1979           constraints_index[n++] = row_cmat_indices[min_loc];
1980         }
1981         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1982       }
1983       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1984       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1985     }
1986   }
1987   *n_constraints = n;
1988   if (constraints_idx) *constraints_idx = constraints_index;
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 /* the next two functions has been adapted from pcis.c */
1993 #undef __FUNCT__
1994 #define __FUNCT__ "PCBDDCApplySchur"
1995 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1996 {
1997   PetscErrorCode ierr;
1998   PC_IS          *pcis = (PC_IS*)(pc->data);
1999 
2000   PetscFunctionBegin;
2001   if (!vec2_B) { vec2_B = v; }
2002   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2003   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2004   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2005   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2006   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2012 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2013 {
2014   PetscErrorCode ierr;
2015   PC_IS          *pcis = (PC_IS*)(pc->data);
2016 
2017   PetscFunctionBegin;
2018   if (!vec2_B) { vec2_B = v; }
2019   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2020   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2021   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2022   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2023   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "PCBDDCSubsetNumbering"
2029 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2030 {
2031   Vec            local_vec,global_vec;
2032   IS             seqis,paris;
2033   VecScatter     scatter_ctx;
2034   PetscScalar    *array;
2035   PetscInt       *temp_global_dofs;
2036   PetscScalar    globalsum;
2037   PetscInt       i,j,s;
2038   PetscInt       nlocals,first_index,old_index,max_local;
2039   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2040   PetscMPIInt    *dof_sizes,*dof_displs;
2041   PetscBool      first_found;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   /* mpi buffers */
2046   MPI_Comm_size(comm,&size_prec_comm);
2047   MPI_Comm_rank(comm,&rank_prec_comm);
2048   j = ( !rank_prec_comm ? size_prec_comm : 0);
2049   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2050   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2051   /* get maximum size of subset */
2052   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2053   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2054   max_local = 0;
2055   if (n_local_dofs) {
2056     max_local = temp_global_dofs[0];
2057     for (i=1;i<n_local_dofs;i++) {
2058       if (max_local < temp_global_dofs[i] ) {
2059         max_local = temp_global_dofs[i];
2060       }
2061     }
2062   }
2063   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2064   max_global++;
2065   max_local = 0;
2066   if (n_local_dofs) {
2067     max_local = local_dofs[0];
2068     for (i=1;i<n_local_dofs;i++) {
2069       if (max_local < local_dofs[i] ) {
2070         max_local = local_dofs[i];
2071       }
2072     }
2073   }
2074   max_local++;
2075   /* allocate workspace */
2076   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2077   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2078   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2079   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2080   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2081   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2082   /* create scatter */
2083   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2084   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2085   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2086   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2087   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2088   /* init array */
2089   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2090   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2091   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2092   if (local_dofs_mult) {
2093     for (i=0;i<n_local_dofs;i++) {
2094       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2095     }
2096   } else {
2097     for (i=0;i<n_local_dofs;i++) {
2098       array[local_dofs[i]]=1.0;
2099     }
2100   }
2101   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2102   /* scatter into global vec and get total number of global dofs */
2103   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2106   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2107   /* Fill global_vec with cumulative function for global numbering */
2108   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2109   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2110   nlocals = 0;
2111   first_index = -1;
2112   first_found = PETSC_FALSE;
2113   for (i=0;i<s;i++) {
2114     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2115       first_found = PETSC_TRUE;
2116       first_index = i;
2117     }
2118     nlocals += (PetscInt)PetscRealPart(array[i]);
2119   }
2120   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2121   if (!rank_prec_comm) {
2122     dof_displs[0]=0;
2123     for (i=1;i<size_prec_comm;i++) {
2124       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2125     }
2126   }
2127   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2128   if (first_found) {
2129     array[first_index] += (PetscScalar)nlocals;
2130     old_index = first_index;
2131     for (i=first_index+1;i<s;i++) {
2132       if (PetscRealPart(array[i]) > 0.0) {
2133         array[i] += array[old_index];
2134         old_index = i;
2135       }
2136     }
2137   }
2138   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2139   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2140   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2141   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2142   /* get global ordering of local dofs */
2143   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2144   if (local_dofs_mult) {
2145     for (i=0;i<n_local_dofs;i++) {
2146       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2147     }
2148   } else {
2149     for (i=0;i<n_local_dofs;i++) {
2150       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2151     }
2152   }
2153   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2154   /* free workspace */
2155   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2156   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2157   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2158   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2159   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2160   /* return pointer to global ordering of local dofs */
2161   *global_numbering_subset = temp_global_dofs;
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2167 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2168 {
2169   PetscInt       i,j;
2170   PetscScalar    *alphas;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   /* this implements stabilized Gram-Schmidt */
2175   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2176   for (i=0;i<n;i++) {
2177     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2178     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2179     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2180   }
2181   ierr = PetscFree(alphas);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatConvert_IS_AIJ"
2188 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2189 {
2190   Mat new_mat;
2191   Mat_IS *matis = (Mat_IS*)(mat->data);
2192   /* info on mat */
2193   PetscInt bs,rows,cols;
2194   PetscInt lrows,lcols;
2195   PetscInt local_rows,local_cols;
2196   PetscMPIInt nsubdomains,rank;
2197   /* preallocation */
2198   Vec vec_dnz,vec_onz;
2199   PetscScalar *my_dnz,*my_onz,*array;
2200   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2201   PetscInt  index_row,index_col,owner;
2202   PetscInt  *local_indices,*global_indices;
2203   /* work */
2204   PetscInt i,j;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   /* CHECKS */
2209   /* get info from mat */
2210   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2211   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2212   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2213   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2214   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2215 
2216   /* MAT_INITIAL_MATRIX */
2217   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2218   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2219   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2220   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2221   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2222   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2223 
2224   /*
2225     preallocation
2226   */
2227 
2228   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2229   /*
2230      Some vectors are needed to sum up properly on shared interface dofs.
2231      Note that preallocation is not exact, since it overestimates nonzeros
2232   */
2233 /*
2234   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2235   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2236   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2237   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2238 */
2239   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2240   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2241   /* All processes should compute entire row ownership */
2242   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2243   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2244   for (i=0;i<nsubdomains;i++) {
2245     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2246       row_ownership[j]=i;
2247     }
2248   }
2249   /* map indices of local mat to global values */
2250   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2251   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2252   for (i=0;i<local_rows;i++) local_indices[i]=i;
2253   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2254   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2255 
2256   /* my_dnz and my_onz contains exact contribution to preallocation from each local mat */
2257   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2258   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2259   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2260   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2261   for (i=0;i<local_rows;i++) {
2262     index_row = global_indices[i];
2263     for (j=i;j<local_rows;j++) {
2264       owner = row_ownership[index_row];
2265       index_col = global_indices[j];
2266       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2267         my_dnz[i] += 1.0;
2268       } else { /* offdiag block */
2269         my_onz[i] += 1.0;
2270       }
2271       /* same as before, interchanging rows and cols */
2272       if (i != j) {
2273         owner = row_ownership[index_col];
2274         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2275           my_dnz[j] += 1.0;
2276         } else {
2277           my_onz[j] += 1.0;
2278         }
2279       }
2280     }
2281   }
2282   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2283   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2284   if (local_rows) { /* multilevel guard */
2285     /* this way, preallocation is always sufficient */
2286     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2287     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2288   }
2289   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2290   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2291   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2292   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2293   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2294   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2295   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2296 
2297   /* set computed preallocation in dnz and onz */
2298   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2299   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2300   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2301   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2302   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2303   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2304   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2305   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2306 
2307   /* Resize preallocation if too much overstimated */
2308   for (i=0;i<lrows;i++) {
2309     dnz[i] = PetscMin(dnz[i],lcols);
2310     onz[i] = PetscMin(onz[i],cols-lcols);
2311   }
2312   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2313   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2314 
2315   /*
2316     SET VALUES. Very Basic.
2317   */
2318   for (i=0;i<local_rows;i++) {
2319     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2320     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2321     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2322     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2323     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2324   }
2325   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2327   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328 
2329   /* get back mat */
2330   *M = new_mat;
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 
2335 
2336 /* BDDC requires metis 5.0.1 for multilevel */
2337 #if defined(PETSC_HAVE_METIS)
2338 #include "metis.h"
2339 #define MetisInt    idx_t
2340 #define MetisScalar real_t
2341 #endif
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2345 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2346 {
2347   PC_BDDC   *pcbddc = (PC_BDDC*)pc->data;
2348   PC_IS     *pcis = (PC_IS*)pc->data;
2349   Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
2350   ISLocalToGlobalMapping coarse_islg;
2351   IS  coarse_is;
2352 
2353   PetscInt  coarse_size;
2354 
2355   MatNullSpace CoarseNullSpace;
2356 
2357   PetscErrorCode ierr;
2358   PCType  coarse_pc_type;
2359   KSPType coarse_ksp_type;
2360   PC pc_temp;
2361 
2362   PetscInt      im_active,active_procs;
2363   PetscBool     setsym,issym=PETSC_FALSE;
2364   PetscInt      *local_primal_indices=0;
2365 
2366 
2367   PetscFunctionBegin;
2368 
2369   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2370 
2371   if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) {
2372     pcbddc->coarse_problem_type = PARALLEL_BDDC;
2373   }
2374 
2375   /* Assign global numbering to coarse dofs */
2376   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2377 
2378   im_active = 0;
2379   if (pcis->n) im_active = 1;
2380   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2381 
2382   /* adapt coarse problem type */
2383 #if defined(PETSC_HAVE_METIS)
2384   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2385     if (pcbddc->current_level < pcbddc->max_levels) {
2386       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2387         if (pcbddc->dbg_flag) {
2388           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2389          ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2390         }
2391         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2392       }
2393     } else {
2394       if (pcbddc->dbg_flag) {
2395         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2396         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2397       }
2398       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2399     }
2400   }
2401 #else
2402   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2403 #endif
2404   /* creates MATIS object for coarse matrix */
2405   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2406   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2407   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2408   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2409   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2410 
2411   if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2412     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2413     /* just for now */
2414     coarse_pc_type  = PCREDUNDANT;
2415     coarse_ksp_type = KSPPREONLY;
2416   } else {
2417   MPI_Comm  prec_comm;
2418   MPI_Comm  coarse_comm;
2419   PetscScalar *temp_coarse_mat_vals;
2420   PetscScalar *ins_coarse_mat_vals;
2421   PetscInt    *ins_local_primal_indices;
2422   PetscMPIInt *localsizes2,*localdispl2;
2423   PetscMPIInt size_prec_comm;
2424   PetscMPIInt rank_prec_comm;
2425   PetscMPIInt master_proc=0;
2426   PetscInt    ins_local_primal_size;
2427   PetscMPIInt *ranks_recv=0;
2428   PetscMPIInt count_recv=0;
2429   PetscMPIInt rank_coarse_proc_send_to=-1;
2430   PetscMPIInt coarse_color = MPI_UNDEFINED;
2431   ISLocalToGlobalMapping coarse_ISLG;
2432   PetscInt i,j,k;
2433   PetscInt      offset,offset2;
2434   PetscInt      *dnz;
2435   PetscInt      *replicated_local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0;
2436   PetscInt      replicated_primal_size=0;
2437   ins_local_primal_indices = 0;
2438   ins_coarse_mat_vals      = 0;
2439   localsizes2              = 0;
2440   localdispl2              = 0;
2441   temp_coarse_mat_vals     = 0;
2442   coarse_ISLG              = 0;
2443   IS coarse_IS;
2444   Mat matis_coarse_local_mat;
2445 
2446   /* OLD version from here */
2447 
2448   /* Construct needed data structures for message passing */
2449   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2450   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2451   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2452   j = size_prec_comm;
2453   ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr);
2454   ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr);
2455   /* Gather local_primal_size information for all processes  */
2456   ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2457   replicated_primal_size = 0;
2458   for (i=0; i<j; i++) {
2459     local_primal_displacements[i] = replicated_primal_size ;
2460     replicated_primal_size += local_primal_sizes[i];
2461   }
2462 
2463 #if defined(PETSC_HAVE_METIS)
2464       /* we need additional variables */
2465       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2466       MetisInt    *metis_coarse_subdivision;
2467       MetisInt    options[METIS_NOPTIONS];
2468       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2469       PetscMPIInt procs_jumps_coarse_comm;
2470       PetscMPIInt *coarse_subdivision;
2471       PetscMPIInt *total_count_recv;
2472       PetscMPIInt *total_ranks_recv;
2473       PetscMPIInt *displacements_recv;
2474       PetscMPIInt *my_faces_connectivity;
2475       PetscMPIInt *petsc_faces_adjncy;
2476       MetisInt    *faces_adjncy;
2477       MetisInt    *faces_xadj;
2478       PetscMPIInt *number_of_faces;
2479       PetscMPIInt *faces_displacements;
2480       PetscInt    *array_int;
2481       PetscMPIInt my_faces=0;
2482       PetscMPIInt total_faces=0;
2483       PetscInt    ranks_stretching_ratio;
2484 
2485       /* define some quantities */
2486       coarse_pc_type  = PCBDDC;
2487       coarse_ksp_type = KSPRICHARDSON;
2488 
2489       /* details of coarse decomposition */
2490       n_subdomains = active_procs;
2491       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2492       ranks_stretching_ratio = size_prec_comm/active_procs;
2493       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2494 
2495 #if 0
2496       PetscMPIInt *old_ranks;
2497       PetscInt    *new_ranks,*jj,*ii;
2498       MatPartitioning mat_part;
2499       IS coarse_new_decomposition,is_numbering;
2500       PetscViewer viewer_test;
2501       MPI_Comm    test_coarse_comm;
2502       PetscMPIInt test_coarse_color;
2503       Mat         mat_adj;
2504       /* Create new communicator for coarse problem splitting the old one */
2505       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2506          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2507       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2508       test_coarse_comm = MPI_COMM_NULL;
2509       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2510       if (im_active) {
2511         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2512         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2513         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2514         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2515         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2516         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2517         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2518         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2519         k = pcis->n_neigh-1;
2520         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2521         ii[0]=0;
2522         ii[1]=k;
2523         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2524         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2525         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2526         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2527         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2528         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2529         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2530         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2531         printf("Setting Nparts %d\n",n_parts);
2532         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2533         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2534         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2535         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2536         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2537         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2538         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2539         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2540         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2541         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2542         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2543         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2544         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2545       }
2546 #endif
2547 
2548       /* build CSR graph of subdomains' connectivity */
2549       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2550       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2551       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2552         for (j=0;j<pcis->n_shared[i];j++){
2553           array_int[ pcis->shared[i][j] ]+=1;
2554         }
2555       }
2556       for (i=1;i<pcis->n_neigh;i++){
2557         for (j=0;j<pcis->n_shared[i];j++){
2558           if (array_int[ pcis->shared[i][j] ] > 0 ){
2559             my_faces++;
2560             break;
2561           }
2562         }
2563       }
2564 
2565       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2566       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2567       my_faces=0;
2568       for (i=1;i<pcis->n_neigh;i++){
2569         for (j=0;j<pcis->n_shared[i];j++){
2570           if (array_int[ pcis->shared[i][j] ] > 0 ){
2571             my_faces_connectivity[my_faces]=pcis->neigh[i];
2572             my_faces++;
2573             break;
2574           }
2575         }
2576       }
2577       if (rank_prec_comm == master_proc) {
2578         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2579         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2580         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2581         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2582         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2583       }
2584       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2585       if (rank_prec_comm == master_proc) {
2586         faces_xadj[0]=0;
2587         faces_displacements[0]=0;
2588         j=0;
2589         for (i=1;i<size_prec_comm+1;i++) {
2590           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2591           if (number_of_faces[i-1]) {
2592             j++;
2593             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2594           }
2595         }
2596       }
2597       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2598       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2599       ierr = PetscFree(array_int);CHKERRQ(ierr);
2600       if (rank_prec_comm == master_proc) {
2601         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2602         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2603         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2604         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2605       }
2606 
2607       if ( rank_prec_comm == master_proc ) {
2608 
2609         PetscInt heuristic_for_metis=3;
2610 
2611         ncon=1;
2612         faces_nvtxs=n_subdomains;
2613         /* partition graoh induced by face connectivity */
2614         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2615         ierr = METIS_SetDefaultOptions(options);
2616         /* we need a contiguous partition of the coarse mesh */
2617         options[METIS_OPTION_CONTIG]=1;
2618         options[METIS_OPTION_NITER]=30;
2619         if (pcbddc->coarsening_ratio > 1) {
2620           if (n_subdomains>n_parts*heuristic_for_metis) {
2621             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2622             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2623             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2624             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2625           } else {
2626             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2627             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2628           }
2629         } else {
2630           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2631         }
2632         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2633         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2634         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2635 
2636         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2637         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2638         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2639         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2640       }
2641 
2642       /* Create new communicator for coarse problem splitting the old one */
2643       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2644         coarse_color=0;              /* for communicator splitting */
2645       }
2646       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2647          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2648       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2649 
2650       if ( coarse_color == 0 ) {
2651         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2652         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2653       } else {
2654         rank_coarse_comm = MPI_PROC_NULL;
2655       }
2656 
2657       /* master proc take care of arranging and distributing coarse information */
2658       if (rank_coarse_comm == master_proc) {
2659         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2660         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2661         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2662         /* some initializations */
2663         displacements_recv[0]=0;
2664         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2665         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2666         for (j=0;j<size_coarse_comm;j++) {
2667           for (i=0;i<size_prec_comm;i++) {
2668           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2669           }
2670         }
2671         /* displacements needed for scatterv of total_ranks_recv */
2672       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2673 
2674         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2675         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2676         for (j=0;j<size_coarse_comm;j++) {
2677           for (i=0;i<size_prec_comm;i++) {
2678             if (coarse_subdivision[i]==j) {
2679               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2680               total_count_recv[j]+=1;
2681             }
2682           }
2683         }
2684         /*for (j=0;j<size_coarse_comm;j++) {
2685           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2686           for (i=0;i<total_count_recv[j];i++) {
2687             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2688           }
2689           printf("\n");
2690         }*/
2691 
2692         /* identify new decomposition in terms of ranks in the old communicator */
2693         for (i=0;i<n_subdomains;i++) {
2694           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2695         }
2696         /*printf("coarse_subdivision in old end new ranks\n");
2697         for (i=0;i<size_prec_comm;i++)
2698           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2699             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2700           } else {
2701             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2702           }
2703         printf("\n");*/
2704       }
2705 
2706       /* Scatter new decomposition for send details */
2707       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2708       /* Scatter receiving details to members of coarse decomposition */
2709       if ( coarse_color == 0) {
2710         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2711         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2712         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2713       }
2714 
2715       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2716       if (coarse_color == 0) {
2717         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2718         for (i=0;i<count_recv;i++)
2719           printf("%d ",ranks_recv[i]);
2720         printf("\n");
2721       }*/
2722 
2723       if (rank_prec_comm == master_proc) {
2724         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2725         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2726         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2727         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2728       }
2729 #endif
2730 
2731 
2732           if(pcbddc->coarsening_ratio == 1) {
2733             ins_local_primal_size = pcbddc->local_primal_size;
2734             ins_local_primal_indices = local_primal_indices;
2735             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2736             /* nonzeros */
2737             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2738             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2739             for (i=0;i<ins_local_primal_size;i++) {
2740               dnz[i] = ins_local_primal_size;
2741             }
2742           } else {
2743             PetscMPIInt send_size;
2744             PetscMPIInt *send_buffer;
2745             PetscInt    *aux_ins_indices;
2746             PetscInt    ii,jj;
2747             MPI_Request *requests;
2748 
2749             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2750             /* reusing local_primal_displacements and replicated_primal_size */
2751             ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
2752             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr);
2753             replicated_primal_size = count_recv;
2754             j = 0;
2755             for (i=0;i<count_recv;i++) {
2756               local_primal_displacements[i] = j;
2757               j += local_primal_sizes[ranks_recv[i]];
2758             }
2759             local_primal_displacements[count_recv] = j;
2760             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr);
2761             /* allocate auxiliary space */
2762             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2763             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2764             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2765             /* allocate stuffs for message massing */
2766             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2767             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2768             /* send indices to be inserted */
2769             for (i=0;i<count_recv;i++) {
2770               send_size = local_primal_sizes[ranks_recv[i]];
2771               ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2772             }
2773             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2774               send_size = pcbddc->local_primal_size;
2775               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2776               for (i=0;i<send_size;i++) {
2777                 send_buffer[i]=(PetscMPIInt)local_primal_indices[i];
2778               }
2779               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2780             }
2781             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2782             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2783               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2784             }
2785             j = 0;
2786             for (i=0;i<count_recv;i++) {
2787               ii = local_primal_displacements[i+1]-local_primal_displacements[i];
2788               localsizes2[i] = ii*ii;
2789               localdispl2[i] = j;
2790               j += localsizes2[i];
2791               jj = local_primal_displacements[i];
2792               /* it counts the coarse subdomains sharing the coarse node */
2793               for (k=0;k<ii;k++) {
2794                 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1;
2795               }
2796             }
2797             /* temp_coarse_mat_vals used to store matrix values to be received */
2798             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2799             /* evaluate how many values I will insert in coarse mat */
2800             ins_local_primal_size = 0;
2801             for (i=0;i<coarse_size;i++) {
2802               if (aux_ins_indices[i]) {
2803                 ins_local_primal_size++;
2804               }
2805             }
2806             /* evaluate indices I will insert in coarse mat */
2807             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2808             j = 0;
2809             for(i=0;i<coarse_size;i++) {
2810               if(aux_ins_indices[i]) {
2811                 ins_local_primal_indices[j] = i;
2812                 j++;
2813               }
2814             }
2815             /* processes partecipating in coarse problem receive matrix data from their friends */
2816             for (i=0;i<count_recv;i++) {
2817               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2818             }
2819             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2820               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2821               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2822             }
2823             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2824             /* nonzeros */
2825             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2826             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2827             /* use aux_ins_indices to realize a global to local mapping */
2828             j=0;
2829             for(i=0;i<coarse_size;i++){
2830               if(aux_ins_indices[i]==0){
2831                 aux_ins_indices[i]=-1;
2832               } else {
2833                 aux_ins_indices[i]=j;
2834                 j++;
2835               }
2836             }
2837             for (i=0;i<count_recv;i++) {
2838               j = local_primal_sizes[ranks_recv[i]];
2839               for (k=0;k<j;k++) {
2840                 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j;
2841               }
2842             }
2843             /* check */
2844             for (i=0;i<ins_local_primal_size;i++) {
2845               if (dnz[i] > ins_local_primal_size) {
2846                 dnz[i] = ins_local_primal_size;
2847               }
2848             }
2849             ierr = PetscFree(requests);CHKERRQ(ierr);
2850             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2851             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2852           }
2853           /* create local to global mapping needed by coarse MATIS */
2854           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2855           coarse_comm = prec_comm;
2856           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2857           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2858           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2859   /* Now create and fill up coarse matrix */
2860 
2861 
2862 
2863       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr);
2864       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
2865       ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2866       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
2867       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
2868       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2869       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2870       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2871       /* preallocation */
2872       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
2873       ierr = PetscFree(dnz);CHKERRQ(ierr);
2874       /* insert values */
2875       if (pcbddc->coarsening_ratio == 1) {
2876         ins_coarse_mat_vals = coarse_submat_vals;
2877         ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2878       } else {
2879         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
2880         for (k=0;k<replicated_primal_size;k++) {
2881           offset = local_primal_displacements[k];
2882           offset2 = localdispl2[k];
2883           ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k];
2884           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2885           for (j=0;j<ins_local_primal_size;j++){
2886             ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j];
2887           }
2888           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
2889           ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
2890           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
2891         }
2892       }
2893       ins_local_primal_indices = 0;
2894       ins_coarse_mat_vals = 0;
2895     ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896     ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
2898   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
2899   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
2900   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
2901   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
2902   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
2903   ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr);
2904   ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
2905   ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr);
2906   }
2907 
2908 
2909   /* propagate symmetry info */
2910   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2911 
2912   /* create local to global scatters */
2913   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2914   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2915 
2916   /* free memory no longer needed */
2917   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2918   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2919   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2920   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2921 
2922   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2923 
2924   /* Compute coarse null space */
2925   CoarseNullSpace = 0;
2926   if (pcbddc->NullSpace) {
2927     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2928   }
2929 
2930   /* KSP for coarse problem */
2931   {
2932     PetscBool isbddc=PETSC_FALSE;
2933     PetscInt  max_it;
2934     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2935     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2936     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2937     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2938     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2939     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2940     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2941     /* Allow user's customization */
2942     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2943     /* Set Up PC for coarse problem BDDC */
2944     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2945       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2946       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2947       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2948       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
2949       if (CoarseNullSpace) {
2950         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2951       }
2952       if (pcbddc->dbg_flag) {
2953         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"----------------Level %d -> Setting up next level---------------\n",pcbddc->current_level);CHKERRQ(ierr);
2954         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2955       }
2956     } else {
2957       if (CoarseNullSpace) {
2958         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2959       }
2960     }
2961     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2962     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2963 
2964     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2965     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2966     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2967     if (max_it == 1) {
2968       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2969       if (isbddc) {
2970         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2971       }
2972     }
2973   }
2974   /* Check coarse problem if requested */
2975   if (pcbddc->dbg_flag) {
2976     KSP check_ksp;
2977     PC  check_pc;
2978     Vec check_vec;
2979     PetscInt its;
2980     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
2981     KSPType check_ksp_type;
2982 
2983     /* Create ksp object suitable for extreme eigenvalues' estimation */
2984     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr);
2985     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2986     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2987     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2988       if (issym) check_ksp_type = KSPCG;
2989       else check_ksp_type = KSPGMRES;
2990       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
2991     } else {
2992       check_ksp_type = KSPPREONLY;
2993     }
2994     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2995     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2996     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2997     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2998     /* create random vec */
2999     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3000     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3001     if (CoarseNullSpace) {
3002       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3003     }
3004     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3005     /* solve coarse problem */
3006     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3007     if (CoarseNullSpace) {
3008       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3009     }
3010     /* check coarse problem residual error */
3011     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3012     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3013     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3014     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3015     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3016     /* get eigenvalue estimation if inexact */
3017     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3018       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3019       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3020       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",its,check_ksp_type);CHKERRQ(ierr);
3021       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3022     }
3023     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3024     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3025     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3026   }
3027   if (pcbddc->dbg_flag) {
3028     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3029   }
3030   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3031   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3032   PetscFunctionReturn(0);
3033 }
3034 
3035 #undef __FUNCT__
3036 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3037 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3038 {
3039   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3040   PC_IS*         pcis = (PC_IS*)pc->data;
3041   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3042   PetscInt       i,j,coarse_size;
3043   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3044   PetscErrorCode ierr;
3045 
3046   PetscFunctionBegin;
3047   /* get indices in local ordering for vertices and constraints */
3048   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3049   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3050   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3051   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3052   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3053   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3054   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3055 
3056   /* Compute global number of coarse dofs */
3057   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3058 
3059   /* check numbering */
3060   if (pcbddc->dbg_flag) {
3061     PetscScalar coarsesum,*array;
3062 
3063     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3064     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3065     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3066     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3067     for (i=0;i<pcbddc->local_primal_size;i++) {
3068       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3069     }
3070     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3071     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3072     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3073     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3074     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3075     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3076     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3077     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3078     for (i=0;i<pcis->n;i++) {
3079       if (array[i] == 1.0) {
3080         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3081       }
3082     }
3083     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3084     for (i=0;i<pcis->n;i++) {
3085       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3086     }
3087     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3088     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3089     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3090     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3091     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3092     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3093     if (pcbddc->dbg_flag > 1) {
3094       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3095       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3096       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3097       for (i=0;i<pcbddc->local_primal_size;i++) {
3098         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3099       }
3100       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3101     }
3102     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3103   }
3104   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3105   /* get back data */
3106   *coarse_size_n = coarse_size;
3107   *local_primal_indices_n = local_primal_indices;
3108   PetscFunctionReturn(0);
3109 }
3110 
3111