xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a57a6d2fb09bfeb8ee22f16f820c94ae990637d6)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
110   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
114   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCBDDCCreateWorkVectors"
122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
123 {
124   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
125   PC_IS          *pcis = (PC_IS*)pc->data;
126   VecType        impVecType;
127   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
132   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
133   local_primal_size = n_constraints+n_vertices;
134   n_R = pcis->n-n_vertices;
135   /* local work vectors */
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
141   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
142   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
143   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
144   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
145   if (n_constraints) {
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
156 {
157   PetscErrorCode         ierr;
158   /* pointers to pcis and pcbddc */
159   PC_IS*                 pcis = (PC_IS*)pc->data;
160   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
161   /* submatrices of local problem */
162   Mat                    A_RV,A_VR,A_VV;
163   /* working matrices */
164   Mat                    M1,M2,M3,C_CR;
165   /* working vectors */
166   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
167   /* additional working stuff */
168   IS                     is_aux;
169   ISLocalToGlobalMapping BtoNmap;
170   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
171   const PetscScalar      *array,*row_cmat_values;
172   const PetscInt         *row_cmat_indices,*idx_R_local;
173   PetscInt               *vertices,*idx_V_B,*auxindices;
174   PetscInt               n_vertices,n_constraints,size_of_constraint;
175   PetscInt               i,j,n_R,n_D,n_B;
176   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
177   /* Vector and matrix types */
178   VecType                impVecType;
179   MatType                impMatType;
180   /* some shortcuts to scalars */
181   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
182   /* for debugging purposes */
183   PetscReal              *coarsefunctions_errors,*constraints_errors;
184 
185   PetscFunctionBegin;
186   /* get number of vertices and their local indices */
187   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
188   n_constraints = pcbddc->local_primal_size-n_vertices;
189   /* Set Non-overlapping dimensions */
190   n_B = pcis->n_B; n_D = pcis->n - n_B;
191   n_R = pcis->n-n_vertices;
192 
193   /* Set types for local objects needed by BDDC precondtioner */
194   impMatType = MATSEQDENSE;
195   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
196 
197   /* Allocating some extra storage just to be safe */
198   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
199   for (i=0;i<pcis->n;i++) auxindices[i]=i;
200 
201   /* vertices in boundary numbering */
202   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
203   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
204   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
205   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
206   if (i != n_vertices) {
207     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
208   }
209 
210   /* some work vectors on vertices and/or constraints */
211   if (n_vertices) {
212     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
213     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
214     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
215     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
216   }
217   if (n_constraints) {
218     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
219     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
220   }
221 
222   /* Precompute stuffs needed for preprocessing and application of BDDC*/
223   if (n_constraints) {
224     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
225     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
226     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
227     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
228 
229     /* Extract constraints on R nodes: C_{CR}  */
230     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
231     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
232     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
233 
234     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
235     for (i=0;i<n_constraints;i++) {
236       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
237       /* Get row of constraint matrix in R numbering */
238       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
239       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
240       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
241       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
242       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
243       /* Solve for row of constraint matrix in R numbering */
244       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
245       /* Set values in local_auxmat2 */
246       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
247       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
248       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
249     }
250     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
253 
254     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
255     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
256     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
257     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
258     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
259     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
260     ierr = MatSetUp(M1);CHKERRQ(ierr);
261     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
262     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
263     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
264     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
265     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
266     ierr = MatDestroy(&M2);CHKERRQ(ierr);
267     ierr = MatDestroy(&M3);CHKERRQ(ierr);
268     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
269     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
270   }
271 
272   /* Get submatrices from subdomain matrix */
273   if (n_vertices) {
274     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
275     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
279   }
280 
281   /* Matrix of coarse basis functions (local) */
282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
283   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
284   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
285   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
286   if (pcbddc->switch_static || pcbddc->dbg_flag) {
287     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
288     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
289     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
290     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
291   }
292 
293   if (pcbddc->dbg_flag) {
294     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
295     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
297   }
298   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
299   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
300 
301   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
302 
303   /* vertices */
304   for (i=0;i<n_vertices;i++) {
305     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
306     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
307     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
308     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
309     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
310     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
311     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
313     if (n_constraints) {
314       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
315       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
316       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
317     }
318     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
319     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
320 
321     /* Set values in coarse basis function and subdomain part of coarse_mat */
322     /* coarse basis functions */
323     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
324     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
327     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
329     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
330     if (pcbddc->switch_static || pcbddc->dbg_flag) {
331       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
334       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
335       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
336     }
337     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
338     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
339     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
340     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
341     if (n_constraints) {
342       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
343       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
345     }
346 
347     /* check */
348     if (pcbddc->dbg_flag) {
349       /* assemble subdomain vector on local nodes */
350       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
351       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
352       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
354       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
355       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
357       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
358       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
359       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
360       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
362       if (n_constraints) {
363         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
364         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
365         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
366       }
367       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
368       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
370       /* check saddle point solution */
371       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
372       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
374       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
375       /* shift by the identity matrix */
376       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
377       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
380     }
381   }
382 
383   /* constraints */
384   for (i=0;i<n_constraints;i++) {
385     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
386     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
387     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
388     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
389     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
390     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
391     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
392     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
393     if (n_vertices) {
394       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
395     }
396     /* Set values in coarse basis function and subdomain part of coarse_mat */
397     /* coarse basis functions */
398     j = i+n_vertices; /* don't touch this! */
399     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
400     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
404     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
405     if (pcbddc->switch_static || pcbddc->dbg_flag) {
406       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
411     }
412     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
413     if (n_vertices) {
414       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
417     }
418     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
420     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
421 
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       if (n_vertices) {
433         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       }
437       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
453     }
454   }
455   /* call assembling routines for local coarse basis */
456   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   if (pcbddc->switch_static || pcbddc->dbg_flag) {
459     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461   }
462 
463   /* compute other basis functions for non-symmetric problems */
464   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
465   if (!setsym || (setsym && !issym)) {
466     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
467     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
469     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
471       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
472       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
473       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
474       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
475     }
476     for (i=0;i<pcbddc->local_primal_size;i++) {
477       if (n_constraints) {
478         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
479         for (j=0;j<n_constraints;j++) {
480           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
481         }
482         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
483         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
484       }
485       if (i<n_vertices) {
486         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
487         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
488         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
490         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
491         if (n_constraints) {
492           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
493         }
494       } else {
495         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
496       }
497       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
498       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
502       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
503       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
504       if (i<n_vertices) {
505         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
506       }
507       if (pcbddc->switch_static || pcbddc->dbg_flag) {
508         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
511         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
513       }
514 
515       if (pcbddc->dbg_flag) {
516         /* assemble subdomain vector on nodes */
517         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
518         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
520         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
521         if (i<n_vertices) {
522           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
523         }
524         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
525         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
526         /* assemble subdomain vector of lagrange multipliers */
527         for (j=0;j<pcbddc->local_primal_size;j++) {
528           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
532         /* check saddle point solution */
533         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
534         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
536         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
537         /* shift by the identity matrix */
538         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542       }
543     }
544     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     if (pcbddc->switch_static || pcbddc->dbg_flag) {
547       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     }
550   }
551   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
552   /* Checking coarse_sub_mat and coarse basis functios */
553   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
554   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   if (pcbddc->dbg_flag) {
556     Mat         coarse_sub_mat;
557     Mat         AUXMAT,TM1,TM2,TM3,TM4;
558     Mat         coarse_phi_D,coarse_phi_B;
559     Mat         coarse_psi_D,coarse_psi_B;
560     Mat         A_II,A_BB,A_IB,A_BI;
561     MatType     checkmattype=MATSEQAIJ;
562     PetscReal   real_value;
563 
564     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
565     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
568     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
570     if (pcbddc->coarse_psi_B) {
571       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
572       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
573     }
574     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
575 
576     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
578     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
581       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
583       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592     } else {
593       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
594       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     }
602     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
606     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
607     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
614     }
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
616     for (i=0;i<pcbddc->local_primal_size;i++) {
617       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
618     }
619     if (pcbddc->coarse_psi_B) {
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
623       }
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
625       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
626         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
627       }
628     }
629     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
630     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
634     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
638     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
640     if (pcbddc->coarse_psi_B) {
641       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
642       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
643     }
644     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
645     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
646     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
647     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
648   }
649   /* free memory */
650   if (n_vertices) {
651     ierr = PetscFree(vertices);CHKERRQ(ierr);
652     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
654     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
657   }
658   if (n_constraints) {
659     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
660     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
661     ierr = MatDestroy(&M1);CHKERRQ(ierr);
662     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
663   }
664   ierr = PetscFree(auxindices);CHKERRQ(ierr);
665   /* get back data */
666   *coarse_submat_vals_n = coarse_submat_vals;
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
673 {
674   PC_IS*            pcis = (PC_IS*)(pc->data);
675   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
676   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
677   /* manage repeated solves */
678   MatReuse          reuse;
679   MatStructure      matstruct;
680   PetscErrorCode    ierr;
681 
682   PetscFunctionBegin;
683   /* get mat flags */
684   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
685   reuse = MAT_INITIAL_MATRIX;
686   if (pc->setupcalled) {
687     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
688     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
689     if (matstruct == SAME_NONZERO_PATTERN) {
690       reuse = MAT_REUSE_MATRIX;
691     } else {
692       reuse = MAT_INITIAL_MATRIX;
693     }
694   }
695   if (reuse == MAT_INITIAL_MATRIX) {
696     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
701   }
702 
703   /* transform local matrices if needed */
704   if (pcbddc->use_change_of_basis) {
705     Mat         change_mat_all;
706     PetscScalar *row_cmat_values;
707     PetscInt    *row_cmat_indices;
708     PetscInt    *nnz,*is_indices,*temp_indices;
709     PetscInt    i,j,k,n_D,n_B;
710 
711     /* Get Non-overlapping dimensions */
712     n_B = pcis->n_B;
713     n_D = pcis->n-n_B;
714 
715     /* compute nonzero structure of change of basis on all local nodes */
716     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
719     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
720     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     k=1;
722     for (i=0;i<n_B;i++) {
723       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724       nnz[is_indices[i]]=j;
725       if (k < j) k = j;
726       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
727     }
728     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     /* assemble change of basis matrix on the whole set of local dofs */
730     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
731     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
732     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
733     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
734     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
735     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
736     for (i=0;i<n_D;i++) {
737       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
738     }
739     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
744       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
746     }
747     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
748     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
750     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
751     if (i==1) {
752       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
753     } else {
754       Mat work_mat;
755       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
756       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
757       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
758     }
759     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
760     ierr = PetscFree(nnz);CHKERRQ(ierr);
761     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
762   } else {
763     /* without change of basis, the local matrix is unchanged */
764     if (!pcbddc->local_mat) {
765       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
766       pcbddc->local_mat = matis->A;
767     }
768   }
769 
770   /* get submatrices */
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
781 {
782   PC_IS*         pcis = (PC_IS*)(pc->data);
783   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
784   IS             is_aux1,is_aux2;
785   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
786   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
787   PetscBT        bitmask;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* Set Non-overlapping dimensions */
792   n_B = pcis->n_B; n_D = pcis->n - n_B;
793   /* get vertex indices from constraint matrix */
794   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
795   /* Set number of constraints */
796   n_constraints = pcbddc->local_primal_size-n_vertices;
797   /* create auxiliary bitmask */
798   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
799   for (i=0;i<n_vertices;i++) {
800     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
801   }
802   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
803   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
804   for (i=0, n_R=0; i<pcis->n; i++) {
805     if (!PetscBTLookup(bitmask,i)) {
806       idx_R_local[n_R] = i;
807       n_R++;
808     }
809   }
810   ierr = PetscFree(vertices);CHKERRQ(ierr);
811   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
812 
813   /* print some info if requested */
814   if (pcbddc->dbg_flag) {
815     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
821     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
822   }
823 
824   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
825   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
827   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
828   for (i=0; i<n_D; i++) {
829     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
830   }
831   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
832   for (i=0, j=0; i<n_R; i++) {
833     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
834       aux_array1[j++] = i;
835     }
836   }
837   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
838   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   for (i=0, j=0; i<n_B; i++) {
840     if (!PetscBTLookup(bitmask,is_indices[i])) {
841       aux_array2[j++] = i;
842     }
843   }
844   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
846   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
847   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
849 
850   if (pcbddc->switch_static || pcbddc->dbg_flag) {
851     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
852     for (i=0, j=0; i<n_R; i++) {
853       if (PetscBTLookup(bitmask,idx_R_local[i])) {
854         aux_array1[j++] = i;
855       }
856     }
857     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
858     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
859     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
860   }
861   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
868 {
869   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
870 
871   PetscFunctionBegin;
872   pcbddc->use_exact_dirichlet=use;
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
879 {
880   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
881   PC_IS          *pcis = (PC_IS*)pc->data;
882   PC             pc_temp;
883   Mat            A_RR;
884   Vec            vec1,vec2,vec3;
885   MatStructure   matstruct;
886   PetscScalar    m_one = -1.0;
887   PetscReal      value;
888   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   /* Creating PC contexts for local Dirichlet and Neumann problems */
893   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
894 
895   /* DIRICHLET PROBLEM */
896   /* Matrix for Dirichlet problem is pcis->A_II */
897   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
898   if (!pcbddc->ksp_D) { /* create object if not yet build */
899     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
900     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
901     /* default */
902     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
903     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
904     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
905     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
906     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
907   }
908   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
909   /* Allow user's customization */
910   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
911   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
912   if (!n_D) {
913     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
914     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
915   }
916   /* Set Up KSP for Dirichlet problem of BDDC */
917   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
918   /* set ksp_D into pcis data */
919   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
920   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
921   pcis->ksp_D = pcbddc->ksp_D;
922 
923   /* NEUMANN PROBLEM */
924   /* Matrix for Neumann problem is A_RR -> we need to create it */
925   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
926   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
927   if (!pcbddc->ksp_R) { /* create object if not yet build */
928     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
929     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
930     /* default */
931     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
932     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
933     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
934     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
935     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
936   }
937   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
938   /* Allow user's customization */
939   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
940   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
941   if (!n_R) {
942     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
943     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
944   }
945   /* Set Up KSP for Neumann problem of BDDC */
946   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
947 
948   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
949 
950   /* Dirichlet */
951   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
952   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
953   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
954   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
955   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
956   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
957   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
958   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
959   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
960   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
961   /* need to be adapted? */
962   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
963   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
964   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
965   /* print info */
966   if (pcbddc->dbg_flag) {
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
971     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
972   }
973   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
974     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
975   }
976 
977   /* Neumann */
978   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
990   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
991   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
995     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
996   }
997   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
998     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
999   }
1000 
1001   /* free Neumann problem's matrix */
1002   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1008 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1009 {
1010   PetscErrorCode ierr;
1011   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1012 
1013   PetscFunctionBegin;
1014   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015   if (pcbddc->local_auxmat1) {
1016     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1017     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1024 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1025 {
1026   PetscErrorCode ierr;
1027   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1028   PC_IS*            pcis = (PC_IS*)  (pc->data);
1029   const PetscScalar zero = 0.0;
1030 
1031   PetscFunctionBegin;
1032   /* Application of PHI^T (or PSI^T)  */
1033   if (pcbddc->coarse_psi_B) {
1034     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1035     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1036   } else {
1037     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1038     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1039   }
1040   /* Scatter data of coarse_rhs */
1041   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1042   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043 
1044   /* Local solution on R nodes */
1045   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1046   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   if (pcbddc->switch_static) {
1049     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   }
1052   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1053   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1054   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   if (pcbddc->switch_static) {
1057     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   }
1060 
1061   /* Coarse solution */
1062   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1064     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1065   }
1066   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068 
1069   /* Sum contributions from two levels */
1070   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1071   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   switch (pcbddc->coarse_communications_type) {
1084     case SCATTERS_BDDC:
1085       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1086       break;
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1093 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1094 {
1095   PetscErrorCode ierr;
1096   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1097 
1098   PetscFunctionBegin;
1099   switch (pcbddc->coarse_communications_type) {
1100     case SCATTERS_BDDC:
1101       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1102       break;
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* uncomment for testing purposes */
1108 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1111 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1112 {
1113   PetscErrorCode    ierr;
1114   PC_IS*            pcis = (PC_IS*)(pc->data);
1115   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1116   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1117   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1118   MatType           impMatType=MATSEQAIJ;
1119   /* one and zero */
1120   PetscScalar       one=1.0,zero=0.0;
1121   /* space to store constraints and their local indices */
1122   PetscScalar       *temp_quadrature_constraint;
1123   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1124   /* iterators */
1125   PetscInt          i,j,k,total_counts,temp_start_ptr;
1126   /* stuff to store connected components stored in pcbddc->mat_graph */
1127   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1128   PetscInt          n_ISForFaces,n_ISForEdges;
1129   /* near null space stuff */
1130   MatNullSpace      nearnullsp;
1131   const Vec         *nearnullvecs;
1132   Vec               *localnearnullsp;
1133   PetscBool         nnsp_has_cnst;
1134   PetscInt          nnsp_size;
1135   PetscScalar       *array;
1136   /* BLAS integers */
1137   PetscBLASInt      lwork,lierr;
1138   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1139   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1140   /* LAPACK working arrays for SVD or POD */
1141   PetscBool         skip_lapack;
1142   PetscScalar       *work;
1143   PetscReal         *singular_vals;
1144 #if defined(PETSC_USE_COMPLEX)
1145   PetscReal         *rwork;
1146 #endif
1147 #if defined(PETSC_MISSING_LAPACK_GESVD)
1148   PetscBLASInt      Blas_one_2=1;
1149   PetscScalar       *temp_basis,*correlation_mat;
1150 #else
1151   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1152   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1153 #endif
1154   /* change of basis */
1155   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1156   PetscBool         boolforchange;
1157   PetscBT           touched,change_basis;
1158   /* auxiliary stuff */
1159   PetscInt          *nnz,*is_indices,*local_to_B;
1160   /* some quantities */
1161   PetscInt          n_vertices,total_primal_vertices;
1162   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1163 
1164 
1165   PetscFunctionBegin;
1166   /* Get index sets for faces, edges and vertices from graph */
1167   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1168     pcbddc->use_vertices = PETSC_TRUE;
1169   }
1170   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1171   /* print some info */
1172   if (pcbddc->dbg_flag) {
1173     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1174     i = 0;
1175     if (ISForVertices) {
1176       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1179     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1180     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182   }
1183   /* check if near null space is attached to global mat */
1184   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1185   if (nearnullsp) {
1186     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1187   } else { /* if near null space is not provided BDDC uses constants by default */
1188     nnsp_size = 0;
1189     nnsp_has_cnst = PETSC_TRUE;
1190   }
1191   /* get max number of constraints on a single cc */
1192   max_constraints = nnsp_size;
1193   if (nnsp_has_cnst) max_constraints++;
1194 
1195   /*
1196        Evaluate maximum storage size needed by the procedure
1197        - temp_indices will contain start index of each constraint stored as follows
1198        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1199        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1200        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1201                                                                                                                                                          */
1202   total_counts = n_ISForFaces+n_ISForEdges;
1203   total_counts *= max_constraints;
1204   n_vertices = 0;
1205   if (ISForVertices) {
1206     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1207   }
1208   total_counts += n_vertices;
1209   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1210   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1211   total_counts = 0;
1212   max_size_of_constraint = 0;
1213   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1214     if (i<n_ISForEdges) {
1215       used_IS = &ISForEdges[i];
1216     } else {
1217       used_IS = &ISForFaces[i-n_ISForEdges];
1218     }
1219     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1220     total_counts += j;
1221     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1222   }
1223   total_counts *= max_constraints;
1224   total_counts += n_vertices;
1225   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1226   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1227   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1228   /* local to boundary numbering */
1229   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1230   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1231   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1232   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1233   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1234   /* get local part of global near null space vectors */
1235   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1236   for (k=0;k<nnsp_size;k++) {
1237     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1238     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240   }
1241 
1242   /* whether or not to skip lapack calls */
1243   skip_lapack = PETSC_TRUE;
1244   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1245 
1246   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1247   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1248     PetscScalar temp_work;
1249 #if defined(PETSC_MISSING_LAPACK_GESVD)
1250     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1251     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1252     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1253     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_COMPLEX)
1255     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1256 #endif
1257     /* now we evaluate the optimal workspace using query with lwork=-1 */
1258     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1259     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1260     lwork = -1;
1261     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1262 #if !defined(PETSC_USE_COMPLEX)
1263     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1264 #else
1265     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1266 #endif
1267     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1268     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1269 #else /* on missing GESVD */
1270     /* SVD */
1271     PetscInt max_n,min_n;
1272     max_n = max_size_of_constraint;
1273     min_n = max_constraints;
1274     if (max_size_of_constraint < max_constraints) {
1275       min_n = max_size_of_constraint;
1276       max_n = max_constraints;
1277     }
1278     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1279 #if defined(PETSC_USE_COMPLEX)
1280     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1281 #endif
1282     /* now we evaluate the optimal workspace using query with lwork=-1 */
1283     lwork = -1;
1284     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1285     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1286     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1287     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1288 #if !defined(PETSC_USE_COMPLEX)
1289     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1290 #else
1291     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1292 #endif
1293     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1294     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1295 #endif /* on missing GESVD */
1296     /* Allocate optimal workspace */
1297     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1298     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1299   }
1300   /* Now we can loop on constraining sets */
1301   total_counts = 0;
1302   temp_indices[0] = 0;
1303   /* vertices */
1304   if (ISForVertices) {
1305     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1306     if (nnsp_has_cnst) { /* consider all vertices */
1307       for (i=0;i<n_vertices;i++) {
1308         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1309         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1310         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1311         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1312         total_counts++;
1313       }
1314     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1315       PetscBool used_vertex;
1316       for (i=0;i<n_vertices;i++) {
1317         used_vertex = PETSC_FALSE;
1318         k = 0;
1319         while (!used_vertex && k<nnsp_size) {
1320           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1321           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1322             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1323             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1324             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1325             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1326             total_counts++;
1327             used_vertex = PETSC_TRUE;
1328           }
1329           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1330           k++;
1331         }
1332       }
1333     }
1334     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1335     n_vertices = total_counts;
1336   }
1337 
1338   /* edges and faces */
1339   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1340     if (i<n_ISForEdges) {
1341       used_IS = &ISForEdges[i];
1342       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1343     } else {
1344       used_IS = &ISForFaces[i-n_ISForEdges];
1345       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1346     }
1347     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1348     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1349     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1350     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1351     /* change of basis should not be performed on local periodic nodes */
1352     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1353     if (nnsp_has_cnst) {
1354       PetscScalar quad_value;
1355       temp_constraints++;
1356       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1357       for (j=0;j<size_of_constraint;j++) {
1358         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1359         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1360         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1361       }
1362       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1363       if (boolforchange) {
1364         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1365       }
1366       total_counts++;
1367     }
1368     for (k=0;k<nnsp_size;k++) {
1369       PetscReal real_value;
1370       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1371       for (j=0;j<size_of_constraint;j++) {
1372         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1373         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1374         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1375       }
1376       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1377       /* check if array is null on the connected component */
1378       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1379       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1380       if (real_value > 0.0) { /* keep indices and values */
1381         temp_constraints++;
1382         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1383         if (boolforchange) {
1384           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1385         }
1386         total_counts++;
1387       }
1388     }
1389     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1390     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1391     if (!pcbddc->use_nnsp_true) {
1392       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1393 
1394 #if defined(PETSC_MISSING_LAPACK_GESVD)
1395       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1396          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1397          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1398             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1399             from that computed using LAPACKgesvd
1400          -> This is due to a different computation of eigenvectors in LAPACKheev
1401          -> The quality of the POD-computed basis will be the same */
1402       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1403       /* Store upper triangular part of correlation matrix */
1404       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1405       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1406       for (j=0;j<temp_constraints;j++) {
1407         for (k=0;k<j+1;k++) {
1408           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1409         }
1410       }
1411       /* compute eigenvalues and eigenvectors of correlation matrix */
1412       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1413       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1414 #if !defined(PETSC_USE_COMPLEX)
1415       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1416 #else
1417       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1418 #endif
1419       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1420       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1421       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1422       j=0;
1423       while (j < temp_constraints && singular_vals[j] < tol) j++;
1424       total_counts=total_counts-j;
1425       /* scale and copy POD basis into used quadrature memory */
1426       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1427       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1428       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1429       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1430       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1431       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1432       if (j<temp_constraints) {
1433         PetscInt ii;
1434         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1435         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1436         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1437         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1438         for (k=0;k<temp_constraints-j;k++) {
1439           for (ii=0;ii<size_of_constraint;ii++) {
1440             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1441           }
1442         }
1443       }
1444 #else  /* on missing GESVD */
1445       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1446       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1447       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1448       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1449 #if !defined(PETSC_USE_COMPLEX)
1450       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1451 #else
1452       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1453 #endif
1454       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1455       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1456       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1457       k = temp_constraints;
1458       if (k > size_of_constraint) k = size_of_constraint;
1459       j = 0;
1460       while (j < k && singular_vals[k-j-1] < tol) j++;
1461       total_counts = total_counts-temp_constraints+k-j;
1462 #endif /* on missing GESVD */
1463     }
1464   }
1465   /* free index sets of faces, edges and vertices */
1466   for (i=0;i<n_ISForFaces;i++) {
1467     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1470   for (i=0;i<n_ISForEdges;i++) {
1471     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1472   }
1473   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1474   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1475 
1476   /* free workspace */
1477   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1478     ierr = PetscFree(work);CHKERRQ(ierr);
1479 #if defined(PETSC_USE_COMPLEX)
1480     ierr = PetscFree(rwork);CHKERRQ(ierr);
1481 #endif
1482     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1483 #if defined(PETSC_MISSING_LAPACK_GESVD)
1484     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1485     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1486 #endif
1487   }
1488   for (k=0;k<nnsp_size;k++) {
1489     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1490   }
1491   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1492 
1493   /* set quantities in pcbddc data structure */
1494   /* n_vertices defines the number of subdomain corners in the primal space */
1495   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1496   pcbddc->local_primal_size = total_counts;
1497   pcbddc->n_vertices = n_vertices;
1498   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1499 
1500   /* Create constraint matrix */
1501   /* The constraint matrix is used to compute the l2g map of primal dofs */
1502   /* so we need to set it up properly either with or without change of basis */
1503   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1504   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1505   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1506   /* array to compute a local numbering of constraints : vertices first then constraints */
1507   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1508   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1509   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1510   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1511   /* auxiliary stuff for basis change */
1512   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1513   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1514 
1515   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1516   total_primal_vertices=0;
1517   for (i=0;i<pcbddc->local_primal_size;i++) {
1518     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1519     if (size_of_constraint == 1) {
1520       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1521       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1522       aux_primal_minloc[total_primal_vertices]=0;
1523       total_primal_vertices++;
1524     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1525       PetscInt min_loc,min_index;
1526       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1527       /* find first untouched local node */
1528       k = 0;
1529       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1530       min_index = global_indices[k];
1531       min_loc = k;
1532       /* search the minimum among global nodes already untouched on the cc */
1533       for (k=1;k<size_of_constraint;k++) {
1534         /* there can be more than one constraint on a single connected component */
1535         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1536           min_index = global_indices[k];
1537           min_loc = k;
1538         }
1539       }
1540       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1541       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1542       aux_primal_minloc[total_primal_vertices]=min_loc;
1543       total_primal_vertices++;
1544     }
1545   }
1546   /* free workspace */
1547   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1548   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1549   /* permute indices in order to have a sorted set of vertices */
1550   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1551 
1552   /* nonzero structure of constraint matrix */
1553   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1554   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1555   j=total_primal_vertices;
1556   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1557     if (!PetscBTLookup(change_basis,i)) {
1558       nnz[j]=temp_indices[i+1]-temp_indices[i];
1559       j++;
1560     }
1561   }
1562   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1563   ierr = PetscFree(nnz);CHKERRQ(ierr);
1564   /* set values in constraint matrix */
1565   for (i=0;i<total_primal_vertices;i++) {
1566     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1567   }
1568   total_counts = total_primal_vertices;
1569   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1570     if (!PetscBTLookup(change_basis,i)) {
1571       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1572       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1573       total_counts++;
1574     }
1575   }
1576   /* assembling */
1577   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   /*
1580   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1581   */
1582   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1583   if (pcbddc->use_change_of_basis) {
1584     PetscBool qr_needed = PETSC_FALSE;
1585     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1586     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1587     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1588     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1589     /* work arrays */
1590     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1591     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1592     /* nonzeros per row */
1593     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1594       if (PetscBTLookup(change_basis,i)) {
1595         qr_needed = PETSC_TRUE;
1596         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1597         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1598       }
1599     }
1600     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1601     ierr = PetscFree(nnz);CHKERRQ(ierr);
1602     /* Set initial identity in the matrix */
1603     for (i=0;i<pcis->n_B;i++) {
1604       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606 
1607     /* Now we loop on the constraints which need a change of basis */
1608     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1609     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1610     /* Change of basis matrix T computed via QR decomposition of constraints */
1611     if (qr_needed) {
1612       /* dual and primal dofs on a single cc */
1613       PetscInt     dual_dofs,primal_dofs;
1614       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1615       PetscInt     primal_counter;
1616       /* working stuff for GEQRF */
1617       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1618       PetscBLASInt lqr_work;
1619       /* working stuff for UNGQR */
1620       PetscScalar  *gqr_work,lgqr_work_t;
1621       PetscBLASInt lgqr_work;
1622       /* working stuff for TRTRS */
1623       PetscScalar  *trs_rhs;
1624       PetscBLASInt Blas_NRHS;
1625       /* pointers for values insertion into change of basis matrix */
1626       PetscInt     *start_rows,*start_cols;
1627       PetscScalar  *start_vals;
1628       /* working stuff for values insertion */
1629       PetscBT      is_primal;
1630 
1631       /* space to store Q */
1632       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1633       /* first we issue queries for optimal work */
1634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1635       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1637       lqr_work = -1;
1638       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1639       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1640       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1641       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1642       lgqr_work = -1;
1643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1644       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1645       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1646       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1647       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1648       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1649       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1650       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1651       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1652       /* array to store scaling factors for reflectors */
1653       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1654       /* array to store rhs and solution of triangular solver */
1655       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1656       /* array to store whether a node is primal or not */
1657       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1658       for (i=0;i<total_primal_vertices;i++) {
1659         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1660       }
1661 
1662       /* allocating workspace for check */
1663       if (pcbddc->dbg_flag) {
1664         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1665         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1666         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1667       }
1668 
1669       /* loop on constraints and see whether or not they need a change of basis */
1670       /* -> using implicit ordering contained in temp_indices data */
1671       total_counts = pcbddc->n_vertices;
1672       primal_counter = total_counts;
1673       while (total_counts<pcbddc->local_primal_size) {
1674         primal_dofs = 1;
1675         if (PetscBTLookup(change_basis,total_counts)) {
1676           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1677           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1678             primal_dofs++;
1679           }
1680           /* get constraint info */
1681           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1682           dual_dofs = size_of_constraint-primal_dofs;
1683 
1684           /* copy quadrature constraints for change of basis check */
1685           if (pcbddc->dbg_flag) {
1686             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1687             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1688           }
1689 
1690           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1691           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1692 
1693           /* compute QR decomposition of constraints */
1694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1695           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1697           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1698           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1699           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1700           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1701 
1702           /* explictly compute R^-T */
1703           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1704           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1705           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1706           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1709           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1710           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1711           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1712           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1713 
1714           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1717           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1719           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1720           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1721           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1722           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1723 
1724           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1725              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1726              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1727           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1728           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1729           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1730           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1731           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1732           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1733           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1734           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1735           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1736           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1737 
1738           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1739           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1740           /* insert cols for primal dofs */
1741           for (j=0;j<primal_dofs;j++) {
1742             start_vals = &qr_basis[j*size_of_constraint];
1743             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1744             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1745           }
1746           /* insert cols for dual dofs */
1747           for (j=0,k=0;j<dual_dofs;k++) {
1748             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1749               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1750               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1751               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1752               j++;
1753             }
1754           }
1755 
1756           /* check change of basis */
1757           if (pcbddc->dbg_flag) {
1758             PetscInt   ii,jj;
1759             PetscBool valid_qr=PETSC_TRUE;
1760             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1764             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1765             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1766             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1767             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1768             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1769             for (jj=0;jj<size_of_constraint;jj++) {
1770               for (ii=0;ii<primal_dofs;ii++) {
1771                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1772                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1773               }
1774             }
1775             if (!valid_qr) {
1776               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1777               for (jj=0;jj<size_of_constraint;jj++) {
1778                 for (ii=0;ii<primal_dofs;ii++) {
1779                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1780                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1781                   }
1782                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1783                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1784                   }
1785                 }
1786               }
1787             } else {
1788               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1789             }
1790           }
1791           /* increment primal counter */
1792           primal_counter += primal_dofs;
1793         } else {
1794           if (pcbddc->dbg_flag) {
1795             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1796           }
1797         }
1798         /* increment constraint counter total_counts */
1799         total_counts += primal_dofs;
1800       }
1801       if (pcbddc->dbg_flag) {
1802         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1803         ierr = PetscFree(work);CHKERRQ(ierr);
1804       }
1805       /* free workspace */
1806       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1807       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1808       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1809       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1810       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1811       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1812     }
1813     /* assembling */
1814     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1815     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1816     /*
1817     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1818     */
1819   }
1820   /* free workspace */
1821   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1822   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1823   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1824   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1825   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1826   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1827   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1828   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1834 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1835 {
1836   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1837   PC_IS       *pcis = (PC_IS*)pc->data;
1838   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1839   PetscInt    bs,ierr,i,vertex_size;
1840   PetscViewer viewer=pcbddc->dbg_viewer;
1841 
1842   PetscFunctionBegin;
1843   /* Init local Graph struct */
1844   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1845 
1846   /* Check validity of the csr graph passed in by the user */
1847   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1848     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1849   }
1850   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1851   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1852     Mat mat_adj;
1853     const PetscInt *xadj,*adjncy;
1854     PetscBool flg_row=PETSC_TRUE;
1855 
1856     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1857     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1858     if (!flg_row) {
1859       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1860     }
1861     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1862     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1863     if (!flg_row) {
1864       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1865     }
1866     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1867   }
1868 
1869   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1870   vertex_size = 1;
1871   if (!pcbddc->n_ISForDofs) {
1872     IS *custom_ISForDofs;
1873 
1874     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1875     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1876     for (i=0;i<bs;i++) {
1877       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1878     }
1879     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1880     /* remove my references to IS objects */
1881     for (i=0;i<bs;i++) {
1882       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1883     }
1884     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1885   } else { /* mat block size as vertex size (used for elasticity) */
1886     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1887   }
1888 
1889   /* Setup of Graph */
1890   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1891 
1892   /* Graph's connected components analysis */
1893   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1894 
1895   /* print some info to stdout */
1896   if (pcbddc->dbg_flag) {
1897     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1904 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1905 {
1906   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1907   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   n = 0;
1912   vertices = 0;
1913   if (pcbddc->ConstraintMatrix) {
1914     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1915     for (i=0;i<local_primal_size;i++) {
1916       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1917       if (size_of_constraint == 1) n++;
1918       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1919     }
1920     if (vertices_idx) {
1921       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1922       n = 0;
1923       for (i=0;i<local_primal_size;i++) {
1924         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1925         if (size_of_constraint == 1) {
1926           vertices[n++]=row_cmat_indices[0];
1927         }
1928         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1929       }
1930     }
1931   }
1932   *n_vertices = n;
1933   if (vertices_idx) *vertices_idx = vertices;
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1939 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1940 {
1941   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1942   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1943   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1944   PetscBT        touched;
1945   PetscErrorCode ierr;
1946 
1947     /* This function assumes that the number of local constraints per connected component
1948        is not greater than the number of nodes defined for the connected component
1949        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1950   PetscFunctionBegin;
1951   n = 0;
1952   constraints_index = 0;
1953   if (pcbddc->ConstraintMatrix) {
1954     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1955     max_size_of_constraint = 0;
1956     for (i=0;i<local_primal_size;i++) {
1957       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1958       if (size_of_constraint > 1) {
1959         n++;
1960       }
1961       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1962       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1963     }
1964     if (constraints_idx) {
1965       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1966       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1967       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1968       n = 0;
1969       for (i=0;i<local_primal_size;i++) {
1970         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1971         if (size_of_constraint > 1) {
1972           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1973           /* find first untouched local node */
1974           j = 0;
1975           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1976           min_index = row_cmat_global_indices[j];
1977           min_loc = j;
1978           /* search the minimum among nodes not yet touched on the connected component
1979              since there can be more than one constraint on a single cc */
1980           for (j=1;j<size_of_constraint;j++) {
1981             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1982               min_index = row_cmat_global_indices[j];
1983               min_loc = j;
1984             }
1985           }
1986           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1987           constraints_index[n++] = row_cmat_indices[min_loc];
1988         }
1989         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1990       }
1991       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1992       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1993     }
1994   }
1995   *n_constraints = n;
1996   if (constraints_idx) *constraints_idx = constraints_index;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /* the next two functions has been adapted from pcis.c */
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PCBDDCApplySchur"
2003 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2004 {
2005   PetscErrorCode ierr;
2006   PC_IS          *pcis = (PC_IS*)(pc->data);
2007 
2008   PetscFunctionBegin;
2009   if (!vec2_B) { vec2_B = v; }
2010   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2011   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2012   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2013   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2014   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2020 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2021 {
2022   PetscErrorCode ierr;
2023   PC_IS          *pcis = (PC_IS*)(pc->data);
2024 
2025   PetscFunctionBegin;
2026   if (!vec2_B) { vec2_B = v; }
2027   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2028   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2029   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2030   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2031   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PCBDDCSubsetNumbering"
2037 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2038 {
2039   Vec            local_vec,global_vec;
2040   IS             seqis,paris;
2041   VecScatter     scatter_ctx;
2042   PetscScalar    *array;
2043   PetscInt       *temp_global_dofs;
2044   PetscScalar    globalsum;
2045   PetscInt       i,j,s;
2046   PetscInt       nlocals,first_index,old_index,max_local;
2047   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2048   PetscMPIInt    *dof_sizes,*dof_displs;
2049   PetscBool      first_found;
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   /* mpi buffers */
2054   MPI_Comm_size(comm,&size_prec_comm);
2055   MPI_Comm_rank(comm,&rank_prec_comm);
2056   j = ( !rank_prec_comm ? size_prec_comm : 0);
2057   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2058   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2059   /* get maximum size of subset */
2060   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2061   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2062   max_local = 0;
2063   if (n_local_dofs) {
2064     max_local = temp_global_dofs[0];
2065     for (i=1;i<n_local_dofs;i++) {
2066       if (max_local < temp_global_dofs[i] ) {
2067         max_local = temp_global_dofs[i];
2068       }
2069     }
2070   }
2071   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2072   max_global++;
2073   max_local = 0;
2074   if (n_local_dofs) {
2075     max_local = local_dofs[0];
2076     for (i=1;i<n_local_dofs;i++) {
2077       if (max_local < local_dofs[i] ) {
2078         max_local = local_dofs[i];
2079       }
2080     }
2081   }
2082   max_local++;
2083   /* allocate workspace */
2084   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2085   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2086   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2087   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2088   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2089   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2090   /* create scatter */
2091   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2092   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2093   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2094   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2095   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2096   /* init array */
2097   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2098   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2099   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2100   if (local_dofs_mult) {
2101     for (i=0;i<n_local_dofs;i++) {
2102       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2103     }
2104   } else {
2105     for (i=0;i<n_local_dofs;i++) {
2106       array[local_dofs[i]]=1.0;
2107     }
2108   }
2109   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2110   /* scatter into global vec and get total number of global dofs */
2111   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2114   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2115   /* Fill global_vec with cumulative function for global numbering */
2116   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2117   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2118   nlocals = 0;
2119   first_index = -1;
2120   first_found = PETSC_FALSE;
2121   for (i=0;i<s;i++) {
2122     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2123       first_found = PETSC_TRUE;
2124       first_index = i;
2125     }
2126     nlocals += (PetscInt)PetscRealPart(array[i]);
2127   }
2128   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2129   if (!rank_prec_comm) {
2130     dof_displs[0]=0;
2131     for (i=1;i<size_prec_comm;i++) {
2132       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2133     }
2134   }
2135   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2136   if (first_found) {
2137     array[first_index] += (PetscScalar)nlocals;
2138     old_index = first_index;
2139     for (i=first_index+1;i<s;i++) {
2140       if (PetscRealPart(array[i]) > 0.0) {
2141         array[i] += array[old_index];
2142         old_index = i;
2143       }
2144     }
2145   }
2146   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2147   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2148   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2149   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2150   /* get global ordering of local dofs */
2151   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2152   if (local_dofs_mult) {
2153     for (i=0;i<n_local_dofs;i++) {
2154       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2155     }
2156   } else {
2157     for (i=0;i<n_local_dofs;i++) {
2158       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2159     }
2160   }
2161   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2162   /* free workspace */
2163   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2164   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2165   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2166   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2167   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2168   /* return pointer to global ordering of local dofs */
2169   *global_numbering_subset = temp_global_dofs;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2175 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2176 {
2177   PetscInt       i,j;
2178   PetscScalar    *alphas;
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   /* this implements stabilized Gram-Schmidt */
2183   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2184   for (i=0;i<n;i++) {
2185     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2186     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2187     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2188   }
2189   ierr = PetscFree(alphas);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatConvert_IS_AIJ"
2196 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2197 {
2198   Mat new_mat;
2199   Mat_IS *matis = (Mat_IS*)(mat->data);
2200   /* info on mat */
2201   PetscInt bs,rows,cols;
2202   PetscInt lrows,lcols;
2203   PetscInt local_rows,local_cols;
2204   PetscMPIInt nsubdomains,rank;
2205   /* preallocation */
2206   Vec vec_dnz,vec_onz;
2207   PetscScalar *my_dnz,*my_onz,*array;
2208   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2209   PetscInt  index_row,index_col,owner;
2210   PetscInt  *local_indices,*global_indices;
2211   /* work */
2212   PetscInt i,j;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   /* CHECKS */
2217   /* get info from mat */
2218   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2219   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2220   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2221   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2222   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2223 
2224   /* MAT_INITIAL_MATRIX */
2225   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2226   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2227   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2228   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2229   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2230   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2231 
2232   /*
2233     preallocation
2234   */
2235 
2236   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2237   /*
2238      Some vectors are needed to sum up properly on shared interface dofs.
2239      Note that preallocation is not exact, since it overestimates nonzeros
2240   */
2241 /*
2242   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2243   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2244   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2245   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2246 */
2247   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2248   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2249   /* All processes should compute entire row ownership */
2250   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2251   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2252   for (i=0;i<nsubdomains;i++) {
2253     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2254       row_ownership[j]=i;
2255     }
2256   }
2257   /* map indices of local mat to global values */
2258   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2259   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2260   for (i=0;i<local_rows;i++) local_indices[i]=i;
2261   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2262   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2263 
2264   /* my_dnz and my_onz contains exact contribution to preallocation from each local mat */
2265   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2266   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2267   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2268   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2269   for (i=0;i<local_rows;i++) {
2270     index_row = global_indices[i];
2271     for (j=i;j<local_rows;j++) {
2272       owner = row_ownership[index_row];
2273       index_col = global_indices[j];
2274       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2275         my_dnz[i] += 1.0;
2276       } else { /* offdiag block */
2277         my_onz[i] += 1.0;
2278       }
2279       /* same as before, interchanging rows and cols */
2280       if (i != j) {
2281         owner = row_ownership[index_col];
2282         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2283           my_dnz[j] += 1.0;
2284         } else {
2285           my_onz[j] += 1.0;
2286         }
2287       }
2288     }
2289   }
2290   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2291   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2292   if (local_rows) {
2293     /* this way, preallocation is always sufficient */
2294     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2295     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2296   }
2297   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2298   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2299   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2300   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2301   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2302   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2303   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2304 
2305   /* set computed preallocation in dnz and onz */
2306   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2307   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2308   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2309   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2310   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2311   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2312   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2313   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2314 
2315   /* Resize preallocation if too much overstimated */
2316   for (i=0;i<lrows;i++) {
2317     dnz[i] = PetscMin(dnz[i],lcols);
2318     onz[i] = PetscMin(onz[i],cols-lcols);
2319   }
2320   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2321   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2322 
2323   /*
2324     SET VALUES. Very Basic.
2325   */
2326   for (i=0;i<local_rows;i++) {
2327     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2328     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2329     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2330     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2331     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2332   }
2333   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2334 
2335   /* get back mat */
2336   *M = new_mat;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 
2341 
2342 /* BDDC requires metis 5.0.1 for multilevel */
2343 #if defined(PETSC_HAVE_METIS)
2344 #include "metis.h"
2345 #define MetisInt    idx_t
2346 #define MetisScalar real_t
2347 #endif
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2351 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2352 {
2353   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2354   PC_IS     *pcis     = (PC_IS*)pc->data;
2355   MPI_Comm  prec_comm;
2356   MPI_Comm  coarse_comm;
2357   Mat coarse_mat;
2358 
2359   PetscInt  coarse_size;
2360 
2361   MatNullSpace CoarseNullSpace;
2362 
2363   /* common to all choiches */
2364   PetscScalar *temp_coarse_mat_vals;
2365   PetscScalar *ins_coarse_mat_vals;
2366   PetscInt    *ins_local_primal_indices;
2367   PetscMPIInt *localsizes2,*localdispl2;
2368   PetscMPIInt size_prec_comm;
2369   PetscMPIInt rank_prec_comm;
2370   PetscMPIInt active_rank=MPI_PROC_NULL;
2371   PetscMPIInt master_proc=0;
2372   PetscInt    ins_local_primal_size;
2373   /* specific to MULTILEVEL_BDDC */
2374   PetscMPIInt *ranks_recv=0;
2375   PetscMPIInt count_recv=0;
2376   PetscMPIInt rank_coarse_proc_send_to=-1;
2377   PetscMPIInt coarse_color = MPI_UNDEFINED;
2378   ISLocalToGlobalMapping coarse_ISLG;
2379   /* some other variables */
2380   PetscErrorCode ierr;
2381   MatType coarse_mat_type;
2382   PCType  coarse_pc_type;
2383   KSPType coarse_ksp_type;
2384   PC pc_temp;
2385   PetscInt i,j,k;
2386   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2387   /* verbose output viewer */
2388   PetscViewer viewer=pcbddc->dbg_viewer;
2389   PetscInt    dbg_flag=pcbddc->dbg_flag;
2390 
2391   PetscInt      offset,offset2;
2392   PetscMPIInt   im_active,active_procs;
2393   PetscInt      *dnz;
2394 
2395   PetscBool     setsym,issym=PETSC_FALSE;
2396 
2397   PetscInt      *replicated_local_primal_indices=0,*local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0;
2398   PetscInt      replicated_primal_size=0;
2399 
2400   PetscFunctionBegin;
2401   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2402   ins_local_primal_indices = 0;
2403   ins_coarse_mat_vals      = 0;
2404   localsizes2              = 0;
2405   localdispl2              = 0;
2406   temp_coarse_mat_vals     = 0;
2407   coarse_ISLG              = 0;
2408 
2409   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2410   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2411   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2412 
2413   if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) {
2414     pcbddc->coarse_problem_type = PARALLEL_BDDC;
2415     pcbddc->coarse_communications_type = SCATTERS_BDDC;
2416   }
2417 
2418   /* Assign global numbering to coarse dofs */
2419   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2420 
2421   im_active = 0;
2422   if (pcis->n) im_active = 1;
2423   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2424 
2425   /* adapt coarse problem type */
2426 #if defined(PETSC_HAVE_METIS)
2427   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2428     if (pcbddc->current_level < pcbddc->max_levels) {
2429       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2430         if (dbg_flag) {
2431           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2432          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2433         }
2434         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2435       }
2436     } else {
2437       if (dbg_flag) {
2438         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2439         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2440       }
2441       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2442     }
2443   }
2444 #else
2445   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2446 #endif
2447 
2448   if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2449     Mat coarse_mat_is,coarse_submat_dense;
2450     ISLocalToGlobalMapping coarse_islg;
2451     IS  coarse_is;
2452     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2453     ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2454     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2455     ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2456     ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2457     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2458     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2459     ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2460     ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2461     ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2462     /* just for now */
2463     pcbddc->coarse_communications_type = SCATTERS_BDDC;
2464     coarse_mat_type = MATAIJ;
2465     coarse_pc_type  = PCREDUNDANT;
2466     coarse_ksp_type = KSPPREONLY;
2467     coarse_comm = prec_comm;
2468     active_rank = rank_prec_comm;
2469   } else {
2470   /* OLD version from here */
2471 
2472   /* Construct needed data structures for message passing */
2473   j = 0;
2474   if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2475     j = size_prec_comm;
2476   }
2477   ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr);
2478   ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr);
2479   /* Gather local_primal_size information for all processes  */
2480   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2481     ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2482   } else {
2483     ierr = MPI_Gather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2484   }
2485   replicated_primal_size = 0;
2486   for (i=0; i<j; i++) {
2487     local_primal_displacements[i] = replicated_primal_size ;
2488     replicated_primal_size += local_primal_sizes[i];
2489   }
2490 
2491   switch(pcbddc->coarse_problem_type){
2492 
2493     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2494     {
2495 #if defined(PETSC_HAVE_METIS)
2496       /* we need additional variables */
2497       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2498       MetisInt    *metis_coarse_subdivision;
2499       MetisInt    options[METIS_NOPTIONS];
2500       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2501       PetscMPIInt procs_jumps_coarse_comm;
2502       PetscMPIInt *coarse_subdivision;
2503       PetscMPIInt *total_count_recv;
2504       PetscMPIInt *total_ranks_recv;
2505       PetscMPIInt *displacements_recv;
2506       PetscMPIInt *my_faces_connectivity;
2507       PetscMPIInt *petsc_faces_adjncy;
2508       MetisInt    *faces_adjncy;
2509       MetisInt    *faces_xadj;
2510       PetscMPIInt *number_of_faces;
2511       PetscMPIInt *faces_displacements;
2512       PetscInt    *array_int;
2513       PetscMPIInt my_faces=0;
2514       PetscMPIInt total_faces=0;
2515       PetscInt    ranks_stretching_ratio;
2516 
2517       /* define some quantities */
2518       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2519       coarse_mat_type = MATIS;
2520       coarse_pc_type  = PCBDDC;
2521       coarse_ksp_type = KSPRICHARDSON;
2522 
2523       /* details of coarse decomposition */
2524       n_subdomains = active_procs;
2525       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2526       ranks_stretching_ratio = size_prec_comm/active_procs;
2527       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2528 
2529 #if 0
2530       PetscMPIInt *old_ranks;
2531       PetscInt    *new_ranks,*jj,*ii;
2532       MatPartitioning mat_part;
2533       IS coarse_new_decomposition,is_numbering;
2534       PetscViewer viewer_test;
2535       MPI_Comm    test_coarse_comm;
2536       PetscMPIInt test_coarse_color;
2537       Mat         mat_adj;
2538       /* Create new communicator for coarse problem splitting the old one */
2539       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2540          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2541       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2542       test_coarse_comm = MPI_COMM_NULL;
2543       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2544       if (im_active) {
2545         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2546         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2547         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2548         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2549         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2550         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2551         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2552         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2553         k = pcis->n_neigh-1;
2554         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2555         ii[0]=0;
2556         ii[1]=k;
2557         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2558         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2559         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2560         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2561         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2562         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2563         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2564         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2565         printf("Setting Nparts %d\n",n_parts);
2566         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2567         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2568         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2569         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2570         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2571         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2572         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2573         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2574         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2575         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2576         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2577         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2578         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2579       }
2580 #endif
2581 
2582       /* build CSR graph of subdomains' connectivity */
2583       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2584       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2585       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2586         for (j=0;j<pcis->n_shared[i];j++){
2587           array_int[ pcis->shared[i][j] ]+=1;
2588         }
2589       }
2590       for (i=1;i<pcis->n_neigh;i++){
2591         for (j=0;j<pcis->n_shared[i];j++){
2592           if (array_int[ pcis->shared[i][j] ] > 0 ){
2593             my_faces++;
2594             break;
2595           }
2596         }
2597       }
2598 
2599       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2600       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2601       my_faces=0;
2602       for (i=1;i<pcis->n_neigh;i++){
2603         for (j=0;j<pcis->n_shared[i];j++){
2604           if (array_int[ pcis->shared[i][j] ] > 0 ){
2605             my_faces_connectivity[my_faces]=pcis->neigh[i];
2606             my_faces++;
2607             break;
2608           }
2609         }
2610       }
2611       if (rank_prec_comm == master_proc) {
2612         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2613         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2614         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2615         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2616         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2617       }
2618       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2619       if (rank_prec_comm == master_proc) {
2620         faces_xadj[0]=0;
2621         faces_displacements[0]=0;
2622         j=0;
2623         for (i=1;i<size_prec_comm+1;i++) {
2624           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2625           if (number_of_faces[i-1]) {
2626             j++;
2627             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2628           }
2629         }
2630       }
2631       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2632       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2633       ierr = PetscFree(array_int);CHKERRQ(ierr);
2634       if (rank_prec_comm == master_proc) {
2635         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2636         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2637         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2638         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2639       }
2640 
2641       if ( rank_prec_comm == master_proc ) {
2642 
2643         PetscInt heuristic_for_metis=3;
2644 
2645         ncon=1;
2646         faces_nvtxs=n_subdomains;
2647         /* partition graoh induced by face connectivity */
2648         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2649         ierr = METIS_SetDefaultOptions(options);
2650         /* we need a contiguous partition of the coarse mesh */
2651         options[METIS_OPTION_CONTIG]=1;
2652         options[METIS_OPTION_NITER]=30;
2653         if (pcbddc->coarsening_ratio > 1) {
2654           if (n_subdomains>n_parts*heuristic_for_metis) {
2655             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2656             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2657             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2658             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2659           } else {
2660             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2661             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2662           }
2663         } else {
2664           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2665         }
2666         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2667         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2668         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2669 
2670         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2671         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2672         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2673         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2674       }
2675 
2676       /* Create new communicator for coarse problem splitting the old one */
2677       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2678         coarse_color=0;              /* for communicator splitting */
2679         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2680       }
2681       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2682          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2683       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2684 
2685       if ( coarse_color == 0 ) {
2686         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2687         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2688       } else {
2689         rank_coarse_comm = MPI_PROC_NULL;
2690       }
2691 
2692       /* master proc take care of arranging and distributing coarse information */
2693       if (rank_coarse_comm == master_proc) {
2694         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2695         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2696         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2697         /* some initializations */
2698         displacements_recv[0]=0;
2699         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2700         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2701         for (j=0;j<size_coarse_comm;j++) {
2702           for (i=0;i<size_prec_comm;i++) {
2703           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2704           }
2705         }
2706         /* displacements needed for scatterv of total_ranks_recv */
2707       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2708 
2709         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2710         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2711         for (j=0;j<size_coarse_comm;j++) {
2712           for (i=0;i<size_prec_comm;i++) {
2713             if (coarse_subdivision[i]==j) {
2714               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2715               total_count_recv[j]+=1;
2716             }
2717           }
2718         }
2719         /*for (j=0;j<size_coarse_comm;j++) {
2720           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2721           for (i=0;i<total_count_recv[j];i++) {
2722             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2723           }
2724           printf("\n");
2725         }*/
2726 
2727         /* identify new decomposition in terms of ranks in the old communicator */
2728         for (i=0;i<n_subdomains;i++) {
2729           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2730         }
2731         /*printf("coarse_subdivision in old end new ranks\n");
2732         for (i=0;i<size_prec_comm;i++)
2733           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2734             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2735           } else {
2736             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2737           }
2738         printf("\n");*/
2739       }
2740 
2741       /* Scatter new decomposition for send details */
2742       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2743       /* Scatter receiving details to members of coarse decomposition */
2744       if ( coarse_color == 0) {
2745         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2746         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2747         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2748       }
2749 
2750       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2751       if (coarse_color == 0) {
2752         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2753         for (i=0;i<count_recv;i++)
2754           printf("%d ",ranks_recv[i]);
2755         printf("\n");
2756       }*/
2757 
2758       if (rank_prec_comm == master_proc) {
2759         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2760         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2761         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2762         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2763       }
2764 #endif
2765       break;
2766     }
2767 
2768     case(PARALLEL_BDDC):
2769 
2770       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2771       coarse_mat_type = MATAIJ;
2772       coarse_pc_type  = PCREDUNDANT;
2773       coarse_ksp_type  = KSPPREONLY;
2774       coarse_comm = prec_comm;
2775       active_rank = rank_prec_comm;
2776       break;
2777 
2778   }
2779 
2780   switch(pcbddc->coarse_communications_type){
2781 
2782     case(SCATTERS_BDDC):
2783       {
2784         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2785 
2786           IS coarse_IS;
2787 
2788           if(pcbddc->coarsening_ratio == 1) {
2789             ins_local_primal_size = pcbddc->local_primal_size;
2790             ins_local_primal_indices = local_primal_indices;
2791             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2792             /* nonzeros */
2793             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2794             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2795             for (i=0;i<ins_local_primal_size;i++) {
2796               dnz[i] = ins_local_primal_size;
2797             }
2798           } else {
2799             PetscMPIInt send_size;
2800             PetscMPIInt *send_buffer;
2801             PetscInt    *aux_ins_indices;
2802             PetscInt    ii,jj;
2803             MPI_Request *requests;
2804 
2805             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2806             /* reusing local_primal_displacements and replicated_primal_size */
2807             ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
2808             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr);
2809             replicated_primal_size = count_recv;
2810             j = 0;
2811             for (i=0;i<count_recv;i++) {
2812               local_primal_displacements[i] = j;
2813               j += local_primal_sizes[ranks_recv[i]];
2814             }
2815             local_primal_displacements[count_recv] = j;
2816             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr);
2817             /* allocate auxiliary space */
2818             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2819             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2820             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2821             /* allocate stuffs for message massing */
2822             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2823             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2824             /* send indices to be inserted */
2825             for (i=0;i<count_recv;i++) {
2826               send_size = local_primal_sizes[ranks_recv[i]];
2827               ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2828             }
2829             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2830               send_size = pcbddc->local_primal_size;
2831               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2832               for (i=0;i<send_size;i++) {
2833                 send_buffer[i]=(PetscMPIInt)local_primal_indices[i];
2834               }
2835               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2836             }
2837             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2838             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2839               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2840             }
2841             j = 0;
2842             for (i=0;i<count_recv;i++) {
2843               ii = local_primal_displacements[i+1]-local_primal_displacements[i];
2844               localsizes2[i] = ii*ii;
2845               localdispl2[i] = j;
2846               j += localsizes2[i];
2847               jj = local_primal_displacements[i];
2848               /* it counts the coarse subdomains sharing the coarse node */
2849               for (k=0;k<ii;k++) {
2850                 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1;
2851               }
2852             }
2853             /* temp_coarse_mat_vals used to store matrix values to be received */
2854             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2855             /* evaluate how many values I will insert in coarse mat */
2856             ins_local_primal_size = 0;
2857             for (i=0;i<coarse_size;i++) {
2858               if (aux_ins_indices[i]) {
2859                 ins_local_primal_size++;
2860               }
2861             }
2862             /* evaluate indices I will insert in coarse mat */
2863             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2864             j = 0;
2865             for(i=0;i<coarse_size;i++) {
2866               if(aux_ins_indices[i]) {
2867                 ins_local_primal_indices[j] = i;
2868                 j++;
2869               }
2870             }
2871             /* processes partecipating in coarse problem receive matrix data from their friends */
2872             for (i=0;i<count_recv;i++) {
2873               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2874             }
2875             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2876               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2877               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2878             }
2879             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2880             /* nonzeros */
2881             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2882             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2883             /* use aux_ins_indices to realize a global to local mapping */
2884             j=0;
2885             for(i=0;i<coarse_size;i++){
2886               if(aux_ins_indices[i]==0){
2887                 aux_ins_indices[i]=-1;
2888               } else {
2889                 aux_ins_indices[i]=j;
2890                 j++;
2891               }
2892             }
2893             for (i=0;i<count_recv;i++) {
2894               j = local_primal_sizes[ranks_recv[i]];
2895               for (k=0;k<j;k++) {
2896                 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j;
2897               }
2898             }
2899             /* check */
2900             for (i=0;i<ins_local_primal_size;i++) {
2901               if (dnz[i] > ins_local_primal_size) {
2902                 dnz[i] = ins_local_primal_size;
2903               }
2904             }
2905             ierr = PetscFree(requests);CHKERRQ(ierr);
2906             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2907             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2908           }
2909           /* create local to global mapping needed by coarse MATIS */
2910           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2911           coarse_comm = prec_comm;
2912           active_rank = rank_prec_comm;
2913           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2914           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2915           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2916         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2917           /* arrays for values insertion */
2918           ins_local_primal_size = pcbddc->local_primal_size;
2919           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2920           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2921           for (j=0;j<ins_local_primal_size;j++){
2922             ins_local_primal_indices[j]=local_primal_indices[j];
2923             for (i=0;i<ins_local_primal_size;i++) {
2924               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2925             }
2926           }
2927         }
2928         break;
2929 
2930     }
2931 
2932   }
2933   }
2934   /* Now create and fill up coarse matrix */
2935   if ( rank_prec_comm == active_rank ) {
2936 
2937     Mat matis_coarse_local_mat;
2938 
2939 
2940     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2941 #if 0
2942       ierr = MatCreate(coarse_comm,&coarse_mat);CHKERRQ(ierr);
2943       ierr = MatSetSizes(coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2944       ierr = MatSetType(coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2945       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
2946       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
2947       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
2948       ierr = MatSetOption(coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2949       ierr = MatSetOption(coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2950 #endif
2951     } else {
2952       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr);
2953       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
2954       ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2955       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
2956       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
2957       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2958       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2959       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2960     }
2961     /* preallocation */
2962     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2963 
2964 #if 0
2965       PetscInt lrows,lcols,bs;
2966 
2967       ierr = MatGetLocalSize(coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2968       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2969       ierr = MatGetBlockSize(coarse_mat,&bs);CHKERRQ(ierr);
2970 
2971       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2972 
2973         Vec         vec_dnz,vec_onz;
2974         PetscScalar *my_dnz,*my_onz,*array;
2975         PetscInt    *mat_ranges,*row_ownership;
2976         PetscInt    coarse_index_row,coarse_index_col,owner;
2977 
2978         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
2979         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2980         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
2981         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
2982         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2983 
2984         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
2985         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
2986         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
2987         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
2988 
2989         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
2990         ierr = MatGetOwnershipRanges(coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2991         for (i=0;i<size_prec_comm;i++) {
2992           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2993             row_ownership[j]=i;
2994           }
2995         }
2996 
2997         for (i=0;i<pcbddc->local_primal_size;i++) {
2998           coarse_index_row = local_primal_indices[i];
2999           owner = row_ownership[coarse_index_row];
3000           for (j=i;j<pcbddc->local_primal_size;j++) {
3001             owner = row_ownership[coarse_index_row];
3002             coarse_index_col = local_primal_indices[j];
3003             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3004               my_dnz[i] += 1.0;
3005             } else {
3006               my_onz[i] += 1.0;
3007             }
3008             if (i != j) {
3009               owner = row_ownership[coarse_index_col];
3010               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3011                 my_dnz[j] += 1.0;
3012               } else {
3013                 my_onz[j] += 1.0;
3014               }
3015             }
3016           }
3017         }
3018         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3019         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3020         if (pcbddc->local_primal_size) {
3021           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3022           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3023         }
3024         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3025         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3026         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3027         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3028         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3029         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3030         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3031 
3032         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3033         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3034         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3035 
3036         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3037         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3038         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3039         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3040         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3041         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3042       }
3043 
3044       /* check */
3045       for (i=0;i<lrows;i++) {
3046         if (dnz[i]>lcols) dnz[i]=lcols;
3047         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
3048       }
3049       ierr = MatSeqAIJSetPreallocation(coarse_mat,0,dnz);CHKERRQ(ierr);
3050       ierr = MatMPIAIJSetPreallocation(coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3051       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3052 #endif
3053     } else {
3054       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3055       ierr = PetscFree(dnz);CHKERRQ(ierr);
3056     }
3057 
3058     /* insert values */
3059     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3060 #if 0
3061       ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3062 #endif
3063     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3064       if (pcbddc->coarsening_ratio == 1) {
3065         ins_coarse_mat_vals = coarse_submat_vals;
3066         ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3067       } else {
3068         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3069         for (k=0;k<replicated_primal_size;k++) {
3070           offset = local_primal_displacements[k];
3071           offset2 = localdispl2[k];
3072           ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k];
3073           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3074           for (j=0;j<ins_local_primal_size;j++){
3075             ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j];
3076           }
3077           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3078           ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3079           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3080         }
3081       }
3082       ins_local_primal_indices = 0;
3083       ins_coarse_mat_vals = 0;
3084     }
3085     ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3086     ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3087     /* symmetry of coarse matrix */
3088     if (issym) {
3089       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3090     }
3091     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3092   }
3093 
3094   /* create loc to glob scatters if needed */
3095   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3096      IS local_IS,global_IS;
3097      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3098      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3099      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3100      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3101      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3102   }
3103 
3104   /* free memory no longer needed */
3105   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3106   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3107   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3108   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3109   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3110   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3111 
3112   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3113   ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr);
3114   ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
3115   ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr);
3116 
3117   /* Compute coarse null space */
3118   CoarseNullSpace = 0;
3119   if (pcbddc->NullSpace) {
3120     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3121   }
3122 
3123   /* KSP for coarse problem */
3124   if (rank_prec_comm == active_rank) {
3125     PetscBool isbddc=PETSC_FALSE;
3126 
3127     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3128     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3129     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3130     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3131     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3132     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3133     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3134     /* Allow user's customization */
3135     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3136     /* Set Up PC for coarse problem BDDC */
3137     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3138       i = pcbddc->current_level+1;
3139       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3140       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3141       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3142       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3143       if (CoarseNullSpace) {
3144         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3145       }
3146       if (dbg_flag) {
3147         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3148         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3149       }
3150     } else {
3151       if (CoarseNullSpace) {
3152         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3153       }
3154     }
3155     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3156     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3157 
3158     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3159     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3160     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3161     if (j == 1) {
3162       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3163       if (isbddc) {
3164         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3165       }
3166     }
3167   }
3168   /* Check coarse problem if requested */
3169   if ( dbg_flag && rank_prec_comm == active_rank ) {
3170     KSP check_ksp;
3171     PC  check_pc;
3172     Vec check_vec;
3173     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3174     KSPType check_ksp_type;
3175 
3176     /* Create ksp object suitable for extreme eigenvalues' estimation */
3177     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3178     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3179     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3180     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3181       if (issym) check_ksp_type = KSPCG;
3182       else check_ksp_type = KSPGMRES;
3183       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3184     } else {
3185       check_ksp_type = KSPPREONLY;
3186     }
3187     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3188     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3189     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3190     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3191     /* create random vec */
3192     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3193     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3194     if (CoarseNullSpace) {
3195       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3196     }
3197     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3198     /* solve coarse problem */
3199     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3200     if (CoarseNullSpace) {
3201       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3202     }
3203     /* check coarse problem residual error */
3204     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3205     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3206     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3207     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3208     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3209     /* get eigenvalue estimation if inexact */
3210     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3211       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3212       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3213       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3214       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3215     }
3216     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3217     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3218     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3219   }
3220   if (dbg_flag) {
3221     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3222   }
3223   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3224   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3230 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3231 {
3232   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3233   PC_IS*         pcis = (PC_IS*)pc->data;
3234   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3235   PetscInt       i,j,coarse_size;
3236   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3237   PetscErrorCode ierr;
3238 
3239   PetscFunctionBegin;
3240   /* get indices in local ordering for vertices and constraints */
3241   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3242   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3243   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3244   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3245   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3246   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3247   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3248 
3249   /* Compute global number of coarse dofs */
3250   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3251 
3252   /* check numbering */
3253   if (pcbddc->dbg_flag) {
3254     PetscScalar coarsesum,*array;
3255 
3256     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3257     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3258     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3259     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3260     for (i=0;i<pcbddc->local_primal_size;i++) {
3261       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3262     }
3263     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3264     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3265     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3266     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3267     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3268     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3269     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3270     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3271     for (i=0;i<pcis->n;i++) {
3272       if (array[i] == 1.0) {
3273         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3274       }
3275     }
3276     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3277     for (i=0;i<pcis->n;i++) {
3278       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3279     }
3280     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3281     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3282     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3283     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3284     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3285     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3286     if (pcbddc->dbg_flag > 1) {
3287       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3288       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3289       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3290       for (i=0;i<pcbddc->local_primal_size;i++) {
3291         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3292       }
3293       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3294     }
3295     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3296   }
3297   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3298   /* get back data */
3299   *coarse_size_n = coarse_size;
3300   *local_primal_indices_n = local_primal_indices;
3301   PetscFunctionReturn(0);
3302 }
3303 
3304