xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision e7931f94dcf2f980579dfb2581f1d8990cb40df3)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
110   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
114   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCBDDCCreateWorkVectors"
122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
123 {
124   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
125   PC_IS          *pcis = (PC_IS*)pc->data;
126   VecType        impVecType;
127   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
132   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
133   local_primal_size = n_constraints+n_vertices;
134   n_R = pcis->n-n_vertices;
135   /* local work vectors */
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
141   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
142   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
143   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
144   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
145   if (n_constraints) {
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
156 {
157   PetscErrorCode         ierr;
158   /* pointers to pcis and pcbddc */
159   PC_IS*                 pcis = (PC_IS*)pc->data;
160   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
161   /* submatrices of local problem */
162   Mat                    A_RV,A_VR,A_VV;
163   /* working matrices */
164   Mat                    M1,M2,M3,C_CR;
165   /* working vectors */
166   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
167   /* additional working stuff */
168   IS                     is_aux;
169   ISLocalToGlobalMapping BtoNmap;
170   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
171   const PetscScalar      *array,*row_cmat_values;
172   const PetscInt         *row_cmat_indices,*idx_R_local;
173   PetscInt               *vertices,*idx_V_B,*auxindices;
174   PetscInt               n_vertices,n_constraints,size_of_constraint;
175   PetscInt               i,j,n_R,n_D,n_B;
176   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
177   /* Vector and matrix types */
178   VecType                impVecType;
179   MatType                impMatType;
180   /* some shortcuts to scalars */
181   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
182   /* for debugging purposes */
183   PetscReal              *coarsefunctions_errors,*constraints_errors;
184 
185   PetscFunctionBegin;
186   /* get number of vertices and their local indices */
187   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
188   n_constraints = pcbddc->local_primal_size-n_vertices;
189   /* Set Non-overlapping dimensions */
190   n_B = pcis->n_B; n_D = pcis->n - n_B;
191   n_R = pcis->n-n_vertices;
192 
193   /* Set types for local objects needed by BDDC precondtioner */
194   impMatType = MATSEQDENSE;
195   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
196 
197   /* Allocating some extra storage just to be safe */
198   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
199   for (i=0;i<pcis->n;i++) auxindices[i]=i;
200 
201   /* vertices in boundary numbering */
202   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
203   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
204   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
205   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
206   if (i != n_vertices) {
207     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
208   }
209 
210   /* some work vectors on vertices and/or constraints */
211   if (n_vertices) {
212     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
213     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
214     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
215     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
216   }
217   if (n_constraints) {
218     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
219     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
220   }
221 
222   /* Precompute stuffs needed for preprocessing and application of BDDC*/
223   if (n_constraints) {
224     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
225     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
226     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
227     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
228 
229     /* Extract constraints on R nodes: C_{CR}  */
230     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
231     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
232     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
233 
234     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
235     for (i=0;i<n_constraints;i++) {
236       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
237       /* Get row of constraint matrix in R numbering */
238       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
239       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
240       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
241       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
242       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
243       /* Solve for row of constraint matrix in R numbering */
244       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
245       /* Set values in local_auxmat2 */
246       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
247       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
248       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
249     }
250     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
253 
254     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
255     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
256     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
257     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
258     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
259     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
260     ierr = MatSetUp(M1);CHKERRQ(ierr);
261     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
262     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
263     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
264     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
265     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
266     ierr = MatDestroy(&M2);CHKERRQ(ierr);
267     ierr = MatDestroy(&M3);CHKERRQ(ierr);
268     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
269     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
270   }
271 
272   /* Get submatrices from subdomain matrix */
273   if (n_vertices) {
274     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
275     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
279   }
280 
281   /* Matrix of coarse basis functions (local) */
282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
283   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
284   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
285   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
286   if (pcbddc->switch_static || pcbddc->dbg_flag) {
287     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
288     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
289     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
290     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
291   }
292 
293   if (pcbddc->dbg_flag) {
294     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
295     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
297   }
298   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
299   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
300 
301   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
302 
303   /* vertices */
304   for (i=0;i<n_vertices;i++) {
305     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
306     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
307     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
308     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
309     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
310     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
311     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
313     if (n_constraints) {
314       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
315       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
316       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
317     }
318     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
319     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
320 
321     /* Set values in coarse basis function and subdomain part of coarse_mat */
322     /* coarse basis functions */
323     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
324     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
327     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
329     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
330     if (pcbddc->switch_static || pcbddc->dbg_flag) {
331       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
334       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
335       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
336     }
337     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
338     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
339     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
340     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
341     if (n_constraints) {
342       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
343       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
345     }
346 
347     /* check */
348     if (pcbddc->dbg_flag) {
349       /* assemble subdomain vector on local nodes */
350       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
351       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
352       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
354       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
355       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
357       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
358       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
359       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
360       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
362       if (n_constraints) {
363         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
364         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
365         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
366       }
367       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
368       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
370       /* check saddle point solution */
371       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
372       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
374       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
375       /* shift by the identity matrix */
376       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
377       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
380     }
381   }
382 
383   /* constraints */
384   for (i=0;i<n_constraints;i++) {
385     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
386     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
387     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
388     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
389     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
390     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
391     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
392     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
393     if (n_vertices) {
394       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
395     }
396     /* Set values in coarse basis function and subdomain part of coarse_mat */
397     /* coarse basis functions */
398     j = i+n_vertices; /* don't touch this! */
399     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
400     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
404     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
405     if (pcbddc->switch_static || pcbddc->dbg_flag) {
406       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
411     }
412     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
413     if (n_vertices) {
414       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
417     }
418     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
420     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
421 
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       if (n_vertices) {
433         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       }
437       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
453     }
454   }
455   /* call assembling routines for local coarse basis */
456   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   if (pcbddc->switch_static || pcbddc->dbg_flag) {
459     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461   }
462 
463   /* compute other basis functions for non-symmetric problems */
464   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
465   if (!setsym || (setsym && !issym)) {
466     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
467     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
469     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
471       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
472       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
473       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
474       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
475     }
476     for (i=0;i<pcbddc->local_primal_size;i++) {
477       if (n_constraints) {
478         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
479         for (j=0;j<n_constraints;j++) {
480           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
481         }
482         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
483         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
484       }
485       if (i<n_vertices) {
486         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
487         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
488         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
490         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
491         if (n_constraints) {
492           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
493         }
494       } else {
495         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
496       }
497       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
498       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
502       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
503       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
504       if (i<n_vertices) {
505         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
506       }
507       if (pcbddc->switch_static || pcbddc->dbg_flag) {
508         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
511         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
513       }
514 
515       if (pcbddc->dbg_flag) {
516         /* assemble subdomain vector on nodes */
517         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
518         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
520         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
521         if (i<n_vertices) {
522           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
523         }
524         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
525         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
526         /* assemble subdomain vector of lagrange multipliers */
527         for (j=0;j<pcbddc->local_primal_size;j++) {
528           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
532         /* check saddle point solution */
533         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
534         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
536         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
537         /* shift by the identity matrix */
538         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542       }
543     }
544     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     if (pcbddc->switch_static || pcbddc->dbg_flag) {
547       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     }
550   }
551   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
552   /* Checking coarse_sub_mat and coarse basis functios */
553   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
554   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   if (pcbddc->dbg_flag) {
556     Mat         coarse_sub_mat;
557     Mat         AUXMAT,TM1,TM2,TM3,TM4;
558     Mat         coarse_phi_D,coarse_phi_B;
559     Mat         coarse_psi_D,coarse_psi_B;
560     Mat         A_II,A_BB,A_IB,A_BI;
561     MatType     checkmattype=MATSEQAIJ;
562     PetscReal   real_value;
563 
564     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
565     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
568     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
570     if (pcbddc->coarse_psi_B) {
571       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
572       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
573     }
574     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
575 
576     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
578     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
581       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
583       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592     } else {
593       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
594       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     }
602     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
606     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
607     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
614     }
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
616     for (i=0;i<pcbddc->local_primal_size;i++) {
617       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
618     }
619     if (pcbddc->coarse_psi_B) {
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
623       }
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
625       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
626         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
627       }
628     }
629     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
630     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
634     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
638     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
640     if (pcbddc->coarse_psi_B) {
641       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
642       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
643     }
644     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
645     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
646     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
647     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
648   }
649   /* free memory */
650   if (n_vertices) {
651     ierr = PetscFree(vertices);CHKERRQ(ierr);
652     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
654     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
657   }
658   if (n_constraints) {
659     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
660     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
661     ierr = MatDestroy(&M1);CHKERRQ(ierr);
662     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
663   }
664   ierr = PetscFree(auxindices);CHKERRQ(ierr);
665   /* get back data */
666   *coarse_submat_vals_n = coarse_submat_vals;
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
673 {
674   PC_IS*            pcis = (PC_IS*)(pc->data);
675   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
676   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
677   /* manage repeated solves */
678   MatReuse          reuse;
679   MatStructure      matstruct;
680   PetscErrorCode    ierr;
681 
682   PetscFunctionBegin;
683   /* get mat flags */
684   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
685   reuse = MAT_INITIAL_MATRIX;
686   if (pc->setupcalled) {
687     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
688     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
689     if (matstruct == SAME_NONZERO_PATTERN) {
690       reuse = MAT_REUSE_MATRIX;
691     } else {
692       reuse = MAT_INITIAL_MATRIX;
693     }
694   }
695   if (reuse == MAT_INITIAL_MATRIX) {
696     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
701   }
702 
703   /* transform local matrices if needed */
704   if (pcbddc->use_change_of_basis) {
705     Mat         change_mat_all;
706     PetscScalar *row_cmat_values;
707     PetscInt    *row_cmat_indices;
708     PetscInt    *nnz,*is_indices,*temp_indices;
709     PetscInt    i,j,k,n_D,n_B;
710 
711     /* Get Non-overlapping dimensions */
712     n_B = pcis->n_B;
713     n_D = pcis->n-n_B;
714 
715     /* compute nonzero structure of change of basis on all local nodes */
716     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
719     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
720     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     k=1;
722     for (i=0;i<n_B;i++) {
723       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724       nnz[is_indices[i]]=j;
725       if (k < j) k = j;
726       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
727     }
728     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     /* assemble change of basis matrix on the whole set of local dofs */
730     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
731     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
732     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
733     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
734     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
735     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
736     for (i=0;i<n_D;i++) {
737       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
738     }
739     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
744       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
746     }
747     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
748     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
750     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
751     if (i==1) {
752       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
753     } else {
754       Mat work_mat;
755       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
756       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
757       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
758     }
759     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
760     ierr = PetscFree(nnz);CHKERRQ(ierr);
761     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
762   } else {
763     /* without change of basis, the local matrix is unchanged */
764     if (!pcbddc->local_mat) {
765       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
766       pcbddc->local_mat = matis->A;
767     }
768   }
769 
770   /* get submatrices */
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
781 {
782   PC_IS*         pcis = (PC_IS*)(pc->data);
783   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
784   IS             is_aux1,is_aux2;
785   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
786   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
787   PetscBT        bitmask;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* Set Non-overlapping dimensions */
792   n_B = pcis->n_B; n_D = pcis->n - n_B;
793   /* get vertex indices from constraint matrix */
794   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
795   /* Set number of constraints */
796   n_constraints = pcbddc->local_primal_size-n_vertices;
797   /* create auxiliary bitmask */
798   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
799   for (i=0;i<n_vertices;i++) {
800     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
801   }
802   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
803   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
804   for (i=0, n_R=0; i<pcis->n; i++) {
805     if (!PetscBTLookup(bitmask,i)) {
806       idx_R_local[n_R] = i;
807       n_R++;
808     }
809   }
810   ierr = PetscFree(vertices);CHKERRQ(ierr);
811   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
812 
813   /* print some info if requested */
814   if (pcbddc->dbg_flag) {
815     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
821     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
822   }
823 
824   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
825   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
827   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
828   for (i=0; i<n_D; i++) {
829     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
830   }
831   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
832   for (i=0, j=0; i<n_R; i++) {
833     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
834       aux_array1[j++] = i;
835     }
836   }
837   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
838   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   for (i=0, j=0; i<n_B; i++) {
840     if (!PetscBTLookup(bitmask,is_indices[i])) {
841       aux_array2[j++] = i;
842     }
843   }
844   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
846   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
847   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
849 
850   if (pcbddc->switch_static || pcbddc->dbg_flag) {
851     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
852     for (i=0, j=0; i<n_R; i++) {
853       if (PetscBTLookup(bitmask,idx_R_local[i])) {
854         aux_array1[j++] = i;
855       }
856     }
857     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
858     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
859     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
860   }
861   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
868 {
869   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
870 
871   PetscFunctionBegin;
872   pcbddc->use_exact_dirichlet=use;
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
879 {
880   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
881   PC_IS          *pcis = (PC_IS*)pc->data;
882   PC             pc_temp;
883   Mat            A_RR;
884   Vec            vec1,vec2,vec3;
885   MatStructure   matstruct;
886   PetscScalar    m_one = -1.0;
887   PetscReal      value;
888   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   /* Creating PC contexts for local Dirichlet and Neumann problems */
893   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
894 
895   /* DIRICHLET PROBLEM */
896   /* Matrix for Dirichlet problem is pcis->A_II */
897   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
898   if (!pcbddc->ksp_D) { /* create object if not yet build */
899     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
900     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
901     /* default */
902     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
903     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
904     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
905     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
906     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
907   }
908   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
909   /* Allow user's customization */
910   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
911   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
912   if (!n_D) {
913     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
914     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
915   }
916   /* Set Up KSP for Dirichlet problem of BDDC */
917   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
918   /* set ksp_D into pcis data */
919   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
920   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
921   pcis->ksp_D = pcbddc->ksp_D;
922 
923   /* NEUMANN PROBLEM */
924   /* Matrix for Neumann problem is A_RR -> we need to create it */
925   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
926   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
927   if (!pcbddc->ksp_R) { /* create object if not yet build */
928     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
929     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
930     /* default */
931     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
932     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
933     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
934     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
935     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
936   }
937   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
938   /* Allow user's customization */
939   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
940   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
941   if (!n_R) {
942     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
943     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
944   }
945   /* Set Up KSP for Neumann problem of BDDC */
946   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
947 
948   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
949 
950   /* Dirichlet */
951   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
952   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
953   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
954   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
955   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
956   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
957   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
958   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
959   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
960   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
961   /* need to be adapted? */
962   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
963   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
964   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
965   /* print info */
966   if (pcbddc->dbg_flag) {
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
971     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
972   }
973   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
974     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
975   }
976 
977   /* Neumann */
978   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
990   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
991   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
995     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
996   }
997   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
998     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
999   }
1000 
1001   /* free Neumann problem's matrix */
1002   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1008 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1009 {
1010   PetscErrorCode ierr;
1011   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1012 
1013   PetscFunctionBegin;
1014   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015   if (pcbddc->local_auxmat1) {
1016     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1017     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1024 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1025 {
1026   PetscErrorCode ierr;
1027   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1028   PC_IS*            pcis = (PC_IS*)  (pc->data);
1029   const PetscScalar zero = 0.0;
1030 
1031   PetscFunctionBegin;
1032   /* Application of PHI^T (or PSI^T)  */
1033   if (pcbddc->coarse_psi_B) {
1034     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1035     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1036   } else {
1037     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1038     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1039   }
1040   /* Scatter data of coarse_rhs */
1041   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1042   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043 
1044   /* Local solution on R nodes */
1045   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1046   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   if (pcbddc->switch_static) {
1049     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   }
1052   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1053   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1054   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   if (pcbddc->switch_static) {
1057     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   }
1060 
1061   /* Coarse solution */
1062   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1064     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1065   }
1066   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068 
1069   /* Sum contributions from two levels */
1070   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1071   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1089 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1090 {
1091   PetscErrorCode ierr;
1092   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1093 
1094   PetscFunctionBegin;
1095   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* uncomment for testing purposes */
1100 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1101 #undef __FUNCT__
1102 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1103 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1104 {
1105   PetscErrorCode    ierr;
1106   PC_IS*            pcis = (PC_IS*)(pc->data);
1107   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1108   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1109   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1110   MatType           impMatType=MATSEQAIJ;
1111   /* one and zero */
1112   PetscScalar       one=1.0,zero=0.0;
1113   /* space to store constraints and their local indices */
1114   PetscScalar       *temp_quadrature_constraint;
1115   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1116   /* iterators */
1117   PetscInt          i,j,k,total_counts,temp_start_ptr;
1118   /* stuff to store connected components stored in pcbddc->mat_graph */
1119   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1120   PetscInt          n_ISForFaces,n_ISForEdges;
1121   /* near null space stuff */
1122   MatNullSpace      nearnullsp;
1123   const Vec         *nearnullvecs;
1124   Vec               *localnearnullsp;
1125   PetscBool         nnsp_has_cnst;
1126   PetscInt          nnsp_size;
1127   PetscScalar       *array;
1128   /* BLAS integers */
1129   PetscBLASInt      lwork,lierr;
1130   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1131   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1132   /* LAPACK working arrays for SVD or POD */
1133   PetscBool         skip_lapack;
1134   PetscScalar       *work;
1135   PetscReal         *singular_vals;
1136 #if defined(PETSC_USE_COMPLEX)
1137   PetscReal         *rwork;
1138 #endif
1139 #if defined(PETSC_MISSING_LAPACK_GESVD)
1140   PetscBLASInt      Blas_one_2=1;
1141   PetscScalar       *temp_basis,*correlation_mat;
1142 #else
1143   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1144   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1145 #endif
1146   /* change of basis */
1147   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1148   PetscBool         boolforchange;
1149   PetscBT           touched,change_basis;
1150   /* auxiliary stuff */
1151   PetscInt          *nnz,*is_indices,*local_to_B;
1152   /* some quantities */
1153   PetscInt          n_vertices,total_primal_vertices;
1154   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1155 
1156 
1157   PetscFunctionBegin;
1158   /* Get index sets for faces, edges and vertices from graph */
1159   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1160     pcbddc->use_vertices = PETSC_TRUE;
1161   }
1162   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1163   /* print some info */
1164   if (pcbddc->dbg_flag) {
1165     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1166     i = 0;
1167     if (ISForVertices) {
1168       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1171     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1172     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1173     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1174   }
1175   /* check if near null space is attached to global mat */
1176   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1177   if (nearnullsp) {
1178     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1179   } else { /* if near null space is not provided BDDC uses constants by default */
1180     nnsp_size = 0;
1181     nnsp_has_cnst = PETSC_TRUE;
1182   }
1183   /* get max number of constraints on a single cc */
1184   max_constraints = nnsp_size;
1185   if (nnsp_has_cnst) max_constraints++;
1186 
1187   /*
1188        Evaluate maximum storage size needed by the procedure
1189        - temp_indices will contain start index of each constraint stored as follows
1190        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1191        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1192        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1193                                                                                                                                                          */
1194   total_counts = n_ISForFaces+n_ISForEdges;
1195   total_counts *= max_constraints;
1196   n_vertices = 0;
1197   if (ISForVertices) {
1198     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1199   }
1200   total_counts += n_vertices;
1201   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1202   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1203   total_counts = 0;
1204   max_size_of_constraint = 0;
1205   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1206     if (i<n_ISForEdges) {
1207       used_IS = &ISForEdges[i];
1208     } else {
1209       used_IS = &ISForFaces[i-n_ISForEdges];
1210     }
1211     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1212     total_counts += j;
1213     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1214   }
1215   total_counts *= max_constraints;
1216   total_counts += n_vertices;
1217   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1218   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1219   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1220   /* local to boundary numbering */
1221   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1222   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1223   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1224   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1225   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1226   /* get local part of global near null space vectors */
1227   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1228   for (k=0;k<nnsp_size;k++) {
1229     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1230     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   }
1233 
1234   /* whether or not to skip lapack calls */
1235   skip_lapack = PETSC_TRUE;
1236   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1237 
1238   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1239   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1240     PetscScalar temp_work;
1241 #if defined(PETSC_MISSING_LAPACK_GESVD)
1242     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1243     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1244     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1245     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1246 #if defined(PETSC_USE_COMPLEX)
1247     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1248 #endif
1249     /* now we evaluate the optimal workspace using query with lwork=-1 */
1250     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1251     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1252     lwork = -1;
1253     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1254 #if !defined(PETSC_USE_COMPLEX)
1255     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1256 #else
1257     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1258 #endif
1259     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1260     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1261 #else /* on missing GESVD */
1262     /* SVD */
1263     PetscInt max_n,min_n;
1264     max_n = max_size_of_constraint;
1265     min_n = max_constraints;
1266     if (max_size_of_constraint < max_constraints) {
1267       min_n = max_size_of_constraint;
1268       max_n = max_constraints;
1269     }
1270     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1271 #if defined(PETSC_USE_COMPLEX)
1272     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1273 #endif
1274     /* now we evaluate the optimal workspace using query with lwork=-1 */
1275     lwork = -1;
1276     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1277     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1278     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1279     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1280 #if !defined(PETSC_USE_COMPLEX)
1281     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1282 #else
1283     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1284 #endif
1285     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1286     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1287 #endif /* on missing GESVD */
1288     /* Allocate optimal workspace */
1289     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1290     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1291   }
1292   /* Now we can loop on constraining sets */
1293   total_counts = 0;
1294   temp_indices[0] = 0;
1295   /* vertices */
1296   if (ISForVertices) {
1297     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298     if (nnsp_has_cnst) { /* consider all vertices */
1299       for (i=0;i<n_vertices;i++) {
1300         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1301         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1302         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1303         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1304         total_counts++;
1305       }
1306     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1307       PetscBool used_vertex;
1308       for (i=0;i<n_vertices;i++) {
1309         used_vertex = PETSC_FALSE;
1310         k = 0;
1311         while (!used_vertex && k<nnsp_size) {
1312           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1313           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1314             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1315             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1316             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1317             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1318             total_counts++;
1319             used_vertex = PETSC_TRUE;
1320           }
1321           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1322           k++;
1323         }
1324       }
1325     }
1326     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1327     n_vertices = total_counts;
1328   }
1329 
1330   /* edges and faces */
1331   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1332     if (i<n_ISForEdges) {
1333       used_IS = &ISForEdges[i];
1334       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1335     } else {
1336       used_IS = &ISForFaces[i-n_ISForEdges];
1337       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1338     }
1339     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1340     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1341     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1342     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1343     /* change of basis should not be performed on local periodic nodes */
1344     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1345     if (nnsp_has_cnst) {
1346       PetscScalar quad_value;
1347       temp_constraints++;
1348       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1349       for (j=0;j<size_of_constraint;j++) {
1350         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1351         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1352         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1353       }
1354       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1355       if (boolforchange) {
1356         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1357       }
1358       total_counts++;
1359     }
1360     for (k=0;k<nnsp_size;k++) {
1361       PetscReal real_value;
1362       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1363       for (j=0;j<size_of_constraint;j++) {
1364         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1365         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1366         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1367       }
1368       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1369       /* check if array is null on the connected component */
1370       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1371       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1372       if (real_value > 0.0) { /* keep indices and values */
1373         temp_constraints++;
1374         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1375         if (boolforchange) {
1376           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1377         }
1378         total_counts++;
1379       }
1380     }
1381     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1382     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1383     if (!pcbddc->use_nnsp_true) {
1384       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1385 
1386 #if defined(PETSC_MISSING_LAPACK_GESVD)
1387       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1388          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1389          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1390             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1391             from that computed using LAPACKgesvd
1392          -> This is due to a different computation of eigenvectors in LAPACKheev
1393          -> The quality of the POD-computed basis will be the same */
1394       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1395       /* Store upper triangular part of correlation matrix */
1396       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1397       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1398       for (j=0;j<temp_constraints;j++) {
1399         for (k=0;k<j+1;k++) {
1400           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1401         }
1402       }
1403       /* compute eigenvalues and eigenvectors of correlation matrix */
1404       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1405       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1406 #if !defined(PETSC_USE_COMPLEX)
1407       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1408 #else
1409       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1410 #endif
1411       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1412       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1413       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1414       j=0;
1415       while (j < temp_constraints && singular_vals[j] < tol) j++;
1416       total_counts=total_counts-j;
1417       /* scale and copy POD basis into used quadrature memory */
1418       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1419       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1420       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1421       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1422       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1423       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1424       if (j<temp_constraints) {
1425         PetscInt ii;
1426         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1427         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1428         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1429         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1430         for (k=0;k<temp_constraints-j;k++) {
1431           for (ii=0;ii<size_of_constraint;ii++) {
1432             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1433           }
1434         }
1435       }
1436 #else  /* on missing GESVD */
1437       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1438       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1439       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1440       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1441 #if !defined(PETSC_USE_COMPLEX)
1442       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1443 #else
1444       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1445 #endif
1446       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1447       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1448       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1449       k = temp_constraints;
1450       if (k > size_of_constraint) k = size_of_constraint;
1451       j = 0;
1452       while (j < k && singular_vals[k-j-1] < tol) j++;
1453       total_counts = total_counts-temp_constraints+k-j;
1454 #endif /* on missing GESVD */
1455     }
1456   }
1457   /* free index sets of faces, edges and vertices */
1458   for (i=0;i<n_ISForFaces;i++) {
1459     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1460   }
1461   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1462   for (i=0;i<n_ISForEdges;i++) {
1463     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1464   }
1465   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1466   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1467 
1468   /* free workspace */
1469   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1470     ierr = PetscFree(work);CHKERRQ(ierr);
1471 #if defined(PETSC_USE_COMPLEX)
1472     ierr = PetscFree(rwork);CHKERRQ(ierr);
1473 #endif
1474     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1475 #if defined(PETSC_MISSING_LAPACK_GESVD)
1476     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1477     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1478 #endif
1479   }
1480   for (k=0;k<nnsp_size;k++) {
1481     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1482   }
1483   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1484 
1485   /* set quantities in pcbddc data structure */
1486   /* n_vertices defines the number of subdomain corners in the primal space */
1487   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1488   pcbddc->local_primal_size = total_counts;
1489   pcbddc->n_vertices = n_vertices;
1490   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1491 
1492   /* Create constraint matrix */
1493   /* The constraint matrix is used to compute the l2g map of primal dofs */
1494   /* so we need to set it up properly either with or without change of basis */
1495   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1496   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1497   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1498   /* array to compute a local numbering of constraints : vertices first then constraints */
1499   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1500   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1501   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1502   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1503   /* auxiliary stuff for basis change */
1504   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1505   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1506 
1507   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1508   total_primal_vertices=0;
1509   for (i=0;i<pcbddc->local_primal_size;i++) {
1510     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1511     if (size_of_constraint == 1) {
1512       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1513       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1514       aux_primal_minloc[total_primal_vertices]=0;
1515       total_primal_vertices++;
1516     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1517       PetscInt min_loc,min_index;
1518       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1519       /* find first untouched local node */
1520       k = 0;
1521       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1522       min_index = global_indices[k];
1523       min_loc = k;
1524       /* search the minimum among global nodes already untouched on the cc */
1525       for (k=1;k<size_of_constraint;k++) {
1526         /* there can be more than one constraint on a single connected component */
1527         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1528           min_index = global_indices[k];
1529           min_loc = k;
1530         }
1531       }
1532       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1533       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1534       aux_primal_minloc[total_primal_vertices]=min_loc;
1535       total_primal_vertices++;
1536     }
1537   }
1538   /* free workspace */
1539   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1540   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1541   /* permute indices in order to have a sorted set of vertices */
1542   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1543 
1544   /* nonzero structure of constraint matrix */
1545   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1546   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1547   j=total_primal_vertices;
1548   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1549     if (!PetscBTLookup(change_basis,i)) {
1550       nnz[j]=temp_indices[i+1]-temp_indices[i];
1551       j++;
1552     }
1553   }
1554   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1555   ierr = PetscFree(nnz);CHKERRQ(ierr);
1556   /* set values in constraint matrix */
1557   for (i=0;i<total_primal_vertices;i++) {
1558     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1559   }
1560   total_counts = total_primal_vertices;
1561   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1562     if (!PetscBTLookup(change_basis,i)) {
1563       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1564       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1565       total_counts++;
1566     }
1567   }
1568   /* assembling */
1569   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1571   /*
1572   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1573   */
1574   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1575   if (pcbddc->use_change_of_basis) {
1576     PetscBool qr_needed = PETSC_FALSE;
1577     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1578     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1579     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1580     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1581     /* work arrays */
1582     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1583     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1584     /* nonzeros per row */
1585     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1586       if (PetscBTLookup(change_basis,i)) {
1587         qr_needed = PETSC_TRUE;
1588         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1589         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1590       }
1591     }
1592     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1593     ierr = PetscFree(nnz);CHKERRQ(ierr);
1594     /* Set initial identity in the matrix */
1595     for (i=0;i<pcis->n_B;i++) {
1596       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1597     }
1598 
1599     /* Now we loop on the constraints which need a change of basis */
1600     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1601     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1602     /* Change of basis matrix T computed via QR decomposition of constraints */
1603     if (qr_needed) {
1604       /* dual and primal dofs on a single cc */
1605       PetscInt     dual_dofs,primal_dofs;
1606       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1607       PetscInt     primal_counter;
1608       /* working stuff for GEQRF */
1609       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1610       PetscBLASInt lqr_work;
1611       /* working stuff for UNGQR */
1612       PetscScalar  *gqr_work,lgqr_work_t;
1613       PetscBLASInt lgqr_work;
1614       /* working stuff for TRTRS */
1615       PetscScalar  *trs_rhs;
1616       PetscBLASInt Blas_NRHS;
1617       /* pointers for values insertion into change of basis matrix */
1618       PetscInt     *start_rows,*start_cols;
1619       PetscScalar  *start_vals;
1620       /* working stuff for values insertion */
1621       PetscBT      is_primal;
1622 
1623       /* space to store Q */
1624       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1625       /* first we issue queries for optimal work */
1626       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1628       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1629       lqr_work = -1;
1630       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1631       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1632       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1633       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1634       lgqr_work = -1;
1635       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1637       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1638       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1639       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1640       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1641       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1642       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1643       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1644       /* array to store scaling factors for reflectors */
1645       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1646       /* array to store rhs and solution of triangular solver */
1647       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1648       /* array to store whether a node is primal or not */
1649       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1650       for (i=0;i<total_primal_vertices;i++) {
1651         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1652       }
1653 
1654       /* allocating workspace for check */
1655       if (pcbddc->dbg_flag) {
1656         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1657         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1658         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1659       }
1660 
1661       /* loop on constraints and see whether or not they need a change of basis */
1662       /* -> using implicit ordering contained in temp_indices data */
1663       total_counts = pcbddc->n_vertices;
1664       primal_counter = total_counts;
1665       while (total_counts<pcbddc->local_primal_size) {
1666         primal_dofs = 1;
1667         if (PetscBTLookup(change_basis,total_counts)) {
1668           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1669           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1670             primal_dofs++;
1671           }
1672           /* get constraint info */
1673           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1674           dual_dofs = size_of_constraint-primal_dofs;
1675 
1676           /* copy quadrature constraints for change of basis check */
1677           if (pcbddc->dbg_flag) {
1678             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1679             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1680           }
1681 
1682           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1683           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1684 
1685           /* compute QR decomposition of constraints */
1686           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1687           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1688           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1689           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1690           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1691           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1692           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1693 
1694           /* explictly compute R^-T */
1695           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1696           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1697           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1698           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1699           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1700           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1701           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1702           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1703           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1704           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1705 
1706           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1709           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1710           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1711           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1712           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1713           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1714           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1715 
1716           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1717              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1718              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1719           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1720           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1721           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1722           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1723           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1724           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1725           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1726           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1727           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1728           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1729 
1730           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1731           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1732           /* insert cols for primal dofs */
1733           for (j=0;j<primal_dofs;j++) {
1734             start_vals = &qr_basis[j*size_of_constraint];
1735             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1736             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1737           }
1738           /* insert cols for dual dofs */
1739           for (j=0,k=0;j<dual_dofs;k++) {
1740             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1741               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1742               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1743               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1744               j++;
1745             }
1746           }
1747 
1748           /* check change of basis */
1749           if (pcbddc->dbg_flag) {
1750             PetscInt   ii,jj;
1751             PetscBool valid_qr=PETSC_TRUE;
1752             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1753             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1754             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1755             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1756             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1757             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1758             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1759             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1760             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1761             for (jj=0;jj<size_of_constraint;jj++) {
1762               for (ii=0;ii<primal_dofs;ii++) {
1763                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1764                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1765               }
1766             }
1767             if (!valid_qr) {
1768               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1769               for (jj=0;jj<size_of_constraint;jj++) {
1770                 for (ii=0;ii<primal_dofs;ii++) {
1771                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1772                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1773                   }
1774                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1775                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1776                   }
1777                 }
1778               }
1779             } else {
1780               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1781             }
1782           }
1783           /* increment primal counter */
1784           primal_counter += primal_dofs;
1785         } else {
1786           if (pcbddc->dbg_flag) {
1787             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1788           }
1789         }
1790         /* increment constraint counter total_counts */
1791         total_counts += primal_dofs;
1792       }
1793       if (pcbddc->dbg_flag) {
1794         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1795         ierr = PetscFree(work);CHKERRQ(ierr);
1796       }
1797       /* free workspace */
1798       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1799       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1800       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1801       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1802       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1803       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1804     }
1805     /* assembling */
1806     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808     /*
1809     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1810     */
1811   }
1812   /* free workspace */
1813   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1814   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1815   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1816   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1817   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1818   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1819   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1820   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1826 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1827 {
1828   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1829   PC_IS       *pcis = (PC_IS*)pc->data;
1830   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1831   PetscInt    bs,ierr,i,vertex_size;
1832   PetscViewer viewer=pcbddc->dbg_viewer;
1833 
1834   PetscFunctionBegin;
1835   /* Init local Graph struct */
1836   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1837 
1838   /* Check validity of the csr graph passed in by the user */
1839   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1840     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1841   }
1842   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1843   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1844     Mat mat_adj;
1845     const PetscInt *xadj,*adjncy;
1846     PetscBool flg_row=PETSC_TRUE;
1847 
1848     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1849     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1850     if (!flg_row) {
1851       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1852     }
1853     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1854     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1855     if (!flg_row) {
1856       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1857     }
1858     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1859   }
1860 
1861   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1862   vertex_size = 1;
1863   if (!pcbddc->n_ISForDofs) {
1864     IS *custom_ISForDofs;
1865 
1866     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1867     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1868     for (i=0;i<bs;i++) {
1869       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1870     }
1871     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1872     /* remove my references to IS objects */
1873     for (i=0;i<bs;i++) {
1874       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1875     }
1876     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1877   } else { /* mat block size as vertex size (used for elasticity) */
1878     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1879   }
1880 
1881   /* Setup of Graph */
1882   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1883 
1884   /* Graph's connected components analysis */
1885   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1886 
1887   /* print some info to stdout */
1888   if (pcbddc->dbg_flag) {
1889     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1896 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1897 {
1898   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1899   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1900   PetscErrorCode ierr;
1901 
1902   PetscFunctionBegin;
1903   n = 0;
1904   vertices = 0;
1905   if (pcbddc->ConstraintMatrix) {
1906     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1907     for (i=0;i<local_primal_size;i++) {
1908       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1909       if (size_of_constraint == 1) n++;
1910       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1911     }
1912     if (vertices_idx) {
1913       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1914       n = 0;
1915       for (i=0;i<local_primal_size;i++) {
1916         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1917         if (size_of_constraint == 1) {
1918           vertices[n++]=row_cmat_indices[0];
1919         }
1920         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1921       }
1922     }
1923   }
1924   *n_vertices = n;
1925   if (vertices_idx) *vertices_idx = vertices;
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1931 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1932 {
1933   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1934   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1935   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1936   PetscBT        touched;
1937   PetscErrorCode ierr;
1938 
1939     /* This function assumes that the number of local constraints per connected component
1940        is not greater than the number of nodes defined for the connected component
1941        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1942   PetscFunctionBegin;
1943   n = 0;
1944   constraints_index = 0;
1945   if (pcbddc->ConstraintMatrix) {
1946     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1947     max_size_of_constraint = 0;
1948     for (i=0;i<local_primal_size;i++) {
1949       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1950       if (size_of_constraint > 1) {
1951         n++;
1952       }
1953       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1954       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1955     }
1956     if (constraints_idx) {
1957       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1958       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1959       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1960       n = 0;
1961       for (i=0;i<local_primal_size;i++) {
1962         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1963         if (size_of_constraint > 1) {
1964           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1965           /* find first untouched local node */
1966           j = 0;
1967           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1968           min_index = row_cmat_global_indices[j];
1969           min_loc = j;
1970           /* search the minimum among nodes not yet touched on the connected component
1971              since there can be more than one constraint on a single cc */
1972           for (j=1;j<size_of_constraint;j++) {
1973             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1974               min_index = row_cmat_global_indices[j];
1975               min_loc = j;
1976             }
1977           }
1978           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1979           constraints_index[n++] = row_cmat_indices[min_loc];
1980         }
1981         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1982       }
1983       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1984       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1985     }
1986   }
1987   *n_constraints = n;
1988   if (constraints_idx) *constraints_idx = constraints_index;
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 /* the next two functions has been adapted from pcis.c */
1993 #undef __FUNCT__
1994 #define __FUNCT__ "PCBDDCApplySchur"
1995 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1996 {
1997   PetscErrorCode ierr;
1998   PC_IS          *pcis = (PC_IS*)(pc->data);
1999 
2000   PetscFunctionBegin;
2001   if (!vec2_B) { vec2_B = v; }
2002   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2003   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2004   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2005   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2006   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2012 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2013 {
2014   PetscErrorCode ierr;
2015   PC_IS          *pcis = (PC_IS*)(pc->data);
2016 
2017   PetscFunctionBegin;
2018   if (!vec2_B) { vec2_B = v; }
2019   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2020   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2021   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2022   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2023   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "PCBDDCSubsetNumbering"
2029 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2030 {
2031   Vec            local_vec,global_vec;
2032   IS             seqis,paris;
2033   VecScatter     scatter_ctx;
2034   PetscScalar    *array;
2035   PetscInt       *temp_global_dofs;
2036   PetscScalar    globalsum;
2037   PetscInt       i,j,s;
2038   PetscInt       nlocals,first_index,old_index,max_local;
2039   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2040   PetscMPIInt    *dof_sizes,*dof_displs;
2041   PetscBool      first_found;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   /* mpi buffers */
2046   MPI_Comm_size(comm,&size_prec_comm);
2047   MPI_Comm_rank(comm,&rank_prec_comm);
2048   j = ( !rank_prec_comm ? size_prec_comm : 0);
2049   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2050   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2051   /* get maximum size of subset */
2052   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2053   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2054   max_local = 0;
2055   if (n_local_dofs) {
2056     max_local = temp_global_dofs[0];
2057     for (i=1;i<n_local_dofs;i++) {
2058       if (max_local < temp_global_dofs[i] ) {
2059         max_local = temp_global_dofs[i];
2060       }
2061     }
2062   }
2063   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2064   max_global++;
2065   max_local = 0;
2066   if (n_local_dofs) {
2067     max_local = local_dofs[0];
2068     for (i=1;i<n_local_dofs;i++) {
2069       if (max_local < local_dofs[i] ) {
2070         max_local = local_dofs[i];
2071       }
2072     }
2073   }
2074   max_local++;
2075   /* allocate workspace */
2076   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2077   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2078   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2079   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2080   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2081   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2082   /* create scatter */
2083   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2084   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2085   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2086   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2087   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2088   /* init array */
2089   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2090   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2091   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2092   if (local_dofs_mult) {
2093     for (i=0;i<n_local_dofs;i++) {
2094       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2095     }
2096   } else {
2097     for (i=0;i<n_local_dofs;i++) {
2098       array[local_dofs[i]]=1.0;
2099     }
2100   }
2101   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2102   /* scatter into global vec and get total number of global dofs */
2103   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2106   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2107   /* Fill global_vec with cumulative function for global numbering */
2108   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2109   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2110   nlocals = 0;
2111   first_index = -1;
2112   first_found = PETSC_FALSE;
2113   for (i=0;i<s;i++) {
2114     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2115       first_found = PETSC_TRUE;
2116       first_index = i;
2117     }
2118     nlocals += (PetscInt)PetscRealPart(array[i]);
2119   }
2120   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2121   if (!rank_prec_comm) {
2122     dof_displs[0]=0;
2123     for (i=1;i<size_prec_comm;i++) {
2124       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2125     }
2126   }
2127   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2128   if (first_found) {
2129     array[first_index] += (PetscScalar)nlocals;
2130     old_index = first_index;
2131     for (i=first_index+1;i<s;i++) {
2132       if (PetscRealPart(array[i]) > 0.0) {
2133         array[i] += array[old_index];
2134         old_index = i;
2135       }
2136     }
2137   }
2138   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2139   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2140   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2141   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2142   /* get global ordering of local dofs */
2143   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2144   if (local_dofs_mult) {
2145     for (i=0;i<n_local_dofs;i++) {
2146       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2147     }
2148   } else {
2149     for (i=0;i<n_local_dofs;i++) {
2150       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2151     }
2152   }
2153   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2154   /* free workspace */
2155   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2156   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2157   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2158   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2159   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2160   /* return pointer to global ordering of local dofs */
2161   *global_numbering_subset = temp_global_dofs;
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2167 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2168 {
2169   PetscInt       i,j;
2170   PetscScalar    *alphas;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   /* this implements stabilized Gram-Schmidt */
2175   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2176   for (i=0;i<n;i++) {
2177     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2178     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2179     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2180   }
2181   ierr = PetscFree(alphas);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatConvert_IS_AIJ"
2188 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2189 {
2190   Mat new_mat;
2191   Mat_IS *matis = (Mat_IS*)(mat->data);
2192   /* info on mat */
2193   PetscInt bs,rows,cols;
2194   PetscInt lrows,lcols;
2195   PetscInt local_rows,local_cols;
2196   PetscMPIInt nsubdomains;
2197   /* preallocation */
2198   Vec vec_dnz,vec_onz;
2199   PetscScalar *my_dnz,*my_onz,*array;
2200   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2201   PetscInt  index_row,index_col,owner;
2202   PetscInt  *local_indices,*global_indices;
2203   /* work */
2204   PetscInt i,j;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   /* CHECKS */
2209   /* get info from mat */
2210   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2211   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2212   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2213   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2214 
2215   /* MAT_INITIAL_MATRIX */
2216   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2217   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2218   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2219   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2220   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2221   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2222 
2223   /*
2224     preallocation
2225   */
2226 
2227   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2228   /*
2229      Some vectors are needed to sum up properly on shared interface dofs.
2230      Note that preallocation is not exact, since it overestimates nonzeros
2231   */
2232 /*
2233   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2234   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2235   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2236   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2237 */
2238   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2239   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2240   /* All processes need to compute entire row ownership */
2241   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2242   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2243   for (i=0;i<nsubdomains;i++) {
2244     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2245       row_ownership[j]=i;
2246     }
2247   }
2248   /* map indices of local mat to global values */
2249   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2250   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2251   for (i=0;i<local_rows;i++) local_indices[i]=i;
2252   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2253   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2254 
2255   /*
2256      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2257      then, they will be summed up properly. This way, preallocation is always sufficient
2258   */
2259   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2260   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2261   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2262   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2263   for (i=0;i<local_rows;i++) {
2264     index_row = global_indices[i];
2265     for (j=i;j<local_rows;j++) {
2266       owner = row_ownership[index_row];
2267       index_col = global_indices[j];
2268       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2269         my_dnz[i] += 1.0;
2270       } else { /* offdiag block */
2271         my_onz[i] += 1.0;
2272       }
2273       /* same as before, interchanging rows and cols */
2274       if (i != j) {
2275         owner = row_ownership[index_col];
2276         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2277           my_dnz[j] += 1.0;
2278         } else {
2279           my_onz[j] += 1.0;
2280         }
2281       }
2282     }
2283   }
2284   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2285   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2286   if (local_rows) { /* multilevel guard */
2287     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2288     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2289   }
2290   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2291   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2292   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2293   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2294   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2295   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2296   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2297 
2298   /* set computed preallocation in dnz and onz */
2299   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2300   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2301   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2302   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2303   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2304   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2305   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2306   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2307 
2308   /* Resize preallocation if overestimated */
2309   for (i=0;i<lrows;i++) {
2310     dnz[i] = PetscMin(dnz[i],lcols);
2311     onz[i] = PetscMin(onz[i],cols-lcols);
2312   }
2313   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2314   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2315 
2316   /*
2317     Set values. Very Basic.
2318   */
2319   for (i=0;i<local_rows;i++) {
2320     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2321     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2322     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2323     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2324     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2325   }
2326   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2328   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2329 
2330   /* get back mat */
2331   *M = new_mat;
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatISSubassemble_Private"
2337 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2338 {
2339   Mat             subdomain_adj;
2340   IS              new_ranks,ranks_send_to;
2341   MatPartitioning partitioner;
2342   Mat_IS          *matis;
2343   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2344   PetscInt        prank;
2345   PetscMPIInt     size,rank,color;
2346   PetscInt        *xadj,*adjncy,*oldranks;
2347   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2348   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2349   PetscErrorCode  ierr;
2350   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE,use_threshold=PETSC_FALSE;
2351   PetscSubcomm    subcomm;
2352 
2353   PetscFunctionBegin;
2354   ierr = PetscOptionsGetBool(NULL,"-matisc_use_square",&use_square,NULL);CHKERRQ(ierr);
2355   ierr = PetscOptionsGetBool(NULL,"-matisc_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2356   ierr = PetscOptionsGetInt(NULL,"-matisc_threshold",&threshold,&use_threshold);CHKERRQ(ierr);
2357 
2358   /* Get info on mapping */
2359   matis = (Mat_IS*)(mat->data);
2360   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2361   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2362 
2363   /* build local CSR graph of subdomains' connectivity */
2364   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2365   xadj[0] = 0;
2366   xadj[1] = PetscMax(n_neighs-1,0);
2367   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2368   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2369 
2370   if (use_threshold) {
2371     PetscInt* count;
2372     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2373     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2374     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2375       for (j=0;j<n_shared[i];j++) {
2376         count[shared[i][j]] += 1;
2377       }
2378     }
2379     xadj[1] = 0;
2380     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2381       for (j=0;j<n_shared[i];j++) {
2382         if (count[shared[i][j]] < threshold) {
2383           adjncy[xadj[1]] = neighs[i];
2384           adjncy_wgt[xadj[1]] = n_shared[i];
2385           xadj[1]++;
2386           break;
2387         }
2388       }
2389     }
2390     ierr = PetscFree(count);CHKERRQ(ierr);
2391   } else {
2392     if (xadj[1]) {
2393       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2394       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2395     }
2396   }
2397   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2398   if (use_square) {
2399     for (i=0;i<xadj[1];i++) {
2400       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2401     }
2402   }
2403   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2404 
2405   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2406 
2407   /*
2408     Restrict work on active processes only.
2409   */
2410   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2411   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2412   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2413   ierr = PetscMPIIntCast(!n_neighs,&color);CHKERRQ(ierr);
2414   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2415   if (color) {
2416     ierr = PetscFree(xadj);CHKERRQ(ierr);
2417     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2418     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2419   } else {
2420     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2421     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2422     prank = rank;
2423     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2424     for (i=0;i<size;i++) {
2425       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2426     }
2427     for (i=0;i<xadj[1];i++) {
2428       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2429     }
2430     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2431     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2432     n_subdomains = (PetscInt)size;
2433     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2434 
2435     /* Partition */
2436     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2437     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2438     if (use_vwgt) {
2439       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2440       v_wgt[0] = local_size;
2441       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2442     }
2443     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2444     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2445     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2446     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2447     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2448     ierr = ISView(new_ranks,0);CHKERRQ(ierr);
2449 
2450     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2451     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2452     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2453     /* clean up */
2454     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2455     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2456     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2457     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2458   }
2459   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2460 
2461   /* assemble parallel IS for sends */
2462   i = 1;
2463   if (color) i=0;
2464   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2465   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2466 
2467   /* get back IS */
2468   *is_sends = ranks_send_to;
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatISSubassemble"
2476 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2477 {
2478   Mat                    local_mat,new_mat;
2479   Mat_IS                 *matis;
2480   IS                     is_sends_internal;
2481   PetscInt               rows,cols;
2482   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2483   PetscBool              ismatis,isdense;
2484   ISLocalToGlobalMapping l2gmap;
2485   PetscInt*              l2gmap_indices;
2486   const PetscInt*        is_indices;
2487   MatType                new_local_type;
2488   MatTypePrivate         new_local_type_private;
2489   /* buffers */
2490   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2491   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2492   /* MPI */
2493   MPI_Comm               comm;
2494   PetscMPIInt            n_sends,n_recvs,commsize;
2495   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2496   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2497   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2498   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2499   PetscErrorCode         ierr;
2500 
2501   PetscFunctionBegin;
2502   /* checks */
2503   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2504   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2505   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2506   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2507   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2508   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2509   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2510   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2511   PetscValidLogicalCollectiveInt(mat,bs,0);
2512   /* prepare IS for sending if not provided */
2513   if (!is_sends) {
2514     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2515     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2516   } else {
2517     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2518     is_sends_internal = is_sends;
2519   }
2520 
2521   /* get pointer of MATIS data */
2522   matis = (Mat_IS*)mat->data;
2523 
2524   /* get comm */
2525   comm = PetscObjectComm((PetscObject)mat);
2526 
2527   /* compute number of sends */
2528   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2529   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2530 
2531   /* compute number of receives */
2532   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2533   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2534   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2535   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2536   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2537   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2538   ierr = PetscFree(iflags);CHKERRQ(ierr);
2539 
2540   /* prepare send/receive buffers */
2541   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2542   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2543   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2544   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2545 
2546   /* Get data from local mat */
2547   if (!isdense) {
2548     /* TODO: See below some guidelines on how to prepare the local buffers */
2549     /*
2550        send_buffer_vals should contain the raw values of the local matrix
2551        send_buffer_idxs should contain:
2552        - MatType_PRIVATE type
2553        - PetscInt        size_of_l2gmap
2554        - PetscInt        global_row_indices[size_of_l2gmap]
2555        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2556     */
2557   } else {
2558     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2559     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2560     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2561     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2562     send_buffer_idxs[1] = i;
2563     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2564     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2565     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2566     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2567     for (i=0;i<n_sends;i++) {
2568       ilengths_vals[is_indices[i]] = len*len;
2569       ilengths_idxs[is_indices[i]] = len+2;
2570     }
2571   }
2572   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2573   buf_size_idxs = 0;
2574   buf_size_vals = 0;
2575   for (i=0;i<n_recvs;i++) {
2576     buf_size_idxs += (PetscInt)olengths_idxs[i];
2577     buf_size_vals += (PetscInt)olengths_vals[i];
2578   }
2579   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2580   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2581 
2582   /* get new tags for clean communications */
2583   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2584   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2585 
2586   /* allocate for requests */
2587   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2588   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2589   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2590   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2591 
2592   /* communications */
2593   ptr_idxs = recv_buffer_idxs;
2594   ptr_vals = recv_buffer_vals;
2595   for (i=0;i<n_recvs;i++) {
2596     source_dest = onodes[i];
2597     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2598     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2599     ptr_idxs += olengths_idxs[i];
2600     ptr_vals += olengths_vals[i];
2601   }
2602   for (i=0;i<n_sends;i++) {
2603     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2604     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2605     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2606   }
2607   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2608   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2609 
2610   /* assemble new l2g map */
2611   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2612   ptr_idxs = recv_buffer_idxs;
2613   buf_size_idxs = 0;
2614   for (i=0;i<n_recvs;i++) {
2615     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2616     ptr_idxs += olengths_idxs[i];
2617   }
2618   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2619   ptr_idxs = recv_buffer_idxs;
2620   buf_size_idxs = 0;
2621   for (i=0;i<n_recvs;i++) {
2622     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2623     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2624     ptr_idxs += olengths_idxs[i];
2625   }
2626   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2627   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2628   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2629 
2630   /* infer new local matrix type from received local matrices type */
2631   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2632   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2633   new_local_type_private = MATAIJ_PRIVATE;
2634   new_local_type = MATSEQAIJ;
2635   if (n_recvs) {
2636     new_local_type_private = send_buffer_idxs[0];
2637     ptr_idxs = recv_buffer_idxs;
2638     for (i=0;i<n_recvs;i++) {
2639       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2640         new_local_type_private = MATAIJ_PRIVATE;
2641         break;
2642       }
2643       ptr_idxs += olengths_idxs[i];
2644     }
2645     switch (new_local_type_private) {
2646       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2647         new_local_type = MATSEQAIJ;
2648         bs = 1;
2649         break;
2650       case MATAIJ_PRIVATE:
2651         new_local_type = MATSEQAIJ;
2652         bs = 1;
2653         break;
2654       case MATBAIJ_PRIVATE:
2655         new_local_type = MATSEQBAIJ;
2656         break;
2657       case MATSBAIJ_PRIVATE:
2658         new_local_type = MATSEQSBAIJ;
2659         break;
2660       default:
2661         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2662         break;
2663     }
2664   }
2665 
2666   /* create MATIS object */
2667   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2668   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2669   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2670   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2671   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2672   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2673 
2674   /* set values */
2675   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2676   ptr_vals = recv_buffer_vals;
2677   ptr_idxs = recv_buffer_idxs;
2678   for (i=0;i<n_recvs;i++) {
2679     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2680       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2681       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2682       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2683       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2684       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2685     }
2686     ptr_idxs += olengths_idxs[i];
2687     ptr_vals += olengths_vals[i];
2688   }
2689   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2692   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2693 
2694   { /* check */
2695     Vec       lvec,rvec;
2696     PetscReal infty_error;
2697 
2698     //ierr = MatView(mat,0);CHKERRQ(ierr);
2699     //ierr = MatView(new_mat,0);CHKERRQ(ierr);
2700     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2701     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2702     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2703     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2704     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2705     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2706     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2707     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2708     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2709   }
2710 
2711   /* free workspace */
2712   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2713   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2714   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2715   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2716   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2717   if (isdense) {
2718     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2719     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2720   } else {
2721     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2722   }
2723   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2724   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2725   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2726   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2727   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2728   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2729   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2730   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2731   ierr = PetscFree(onodes);CHKERRQ(ierr);
2732   /* get back new mat */
2733   *mat_n = new_mat;
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 /* BDDC requires metis 5.0.1 for multilevel */
2738 #if defined(PETSC_HAVE_METIS)
2739 #include "metis.h"
2740 #define MetisInt    idx_t
2741 #define MetisScalar real_t
2742 #endif
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2746 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2747 {
2748   PC_BDDC   *pcbddc = (PC_BDDC*)pc->data;
2749   PC_IS     *pcis = (PC_IS*)pc->data;
2750   Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
2751   ISLocalToGlobalMapping coarse_islg;
2752   IS  coarse_is;
2753 
2754   PetscInt  coarse_size;
2755 
2756   MatNullSpace CoarseNullSpace;
2757 
2758   PetscErrorCode ierr;
2759   PCType  coarse_pc_type;
2760   KSPType coarse_ksp_type;
2761   PC pc_temp;
2762 
2763   PetscInt      im_active,active_procs;
2764   PetscBool     setsym,issym=PETSC_FALSE;
2765   PetscInt      *local_primal_indices=0;
2766 
2767 
2768   PetscFunctionBegin;
2769 
2770   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2771 
2772   if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) {
2773     pcbddc->coarse_problem_type = PARALLEL_BDDC;
2774   }
2775 
2776   /* Assign global numbering to coarse dofs */
2777   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2778 
2779   im_active = 0;
2780   if (pcis->n) im_active = 1;
2781   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2782 
2783   /* adapt coarse problem type */
2784   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2785     if (pcbddc->current_level < pcbddc->max_levels) {
2786       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2787         if (pcbddc->dbg_flag) {
2788           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active %d,ratio %d). Using direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2789          ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2790         }
2791         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2792       }
2793     } else {
2794       if (pcbddc->dbg_flag) {
2795         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2796         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2797       }
2798       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2799     }
2800   }
2801 
2802   /* creates MATIS object for coarse matrix */
2803   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2804   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2805   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2806   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2807   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2808   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2809   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810 
2811   /* TEST */
2812 
2813   if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2814     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2815     /* just for now */
2816     coarse_pc_type  = PCREDUNDANT;
2817     coarse_ksp_type = KSPPREONLY;
2818   } else {
2819 #if 1
2820     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2821     /* just for now */
2822     coarse_pc_type  = PCBDDC;
2823     coarse_ksp_type = KSPRICHARDSON;
2824   }
2825 #else
2826     Mat coarse_mat_is_subassembled;
2827     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat_is_subassembled);CHKERRQ(ierr);
2828 
2829   MPI_Comm  prec_comm;
2830   MPI_Comm  coarse_comm;
2831   PetscScalar *temp_coarse_mat_vals;
2832   PetscScalar *ins_coarse_mat_vals;
2833   PetscInt    *ins_local_primal_indices;
2834   PetscMPIInt *localsizes2,*localdispl2;
2835   PetscMPIInt size_prec_comm;
2836   PetscMPIInt rank_prec_comm;
2837   PetscMPIInt master_proc=0;
2838   PetscInt    ins_local_primal_size;
2839   PetscMPIInt *ranks_recv=0;
2840   PetscMPIInt count_recv=0;
2841   PetscMPIInt rank_coarse_proc_send_to=-1;
2842   PetscMPIInt coarse_color = MPI_UNDEFINED;
2843   ISLocalToGlobalMapping coarse_ISLG;
2844   PetscInt i,j,k;
2845   PetscInt      offset,offset2;
2846   PetscInt      *dnz;
2847   PetscInt      *replicated_local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0;
2848   PetscInt      replicated_primal_size=0;
2849   ins_local_primal_indices = 0;
2850   ins_coarse_mat_vals      = 0;
2851   localsizes2              = 0;
2852   localdispl2              = 0;
2853   temp_coarse_mat_vals     = 0;
2854   coarse_ISLG              = 0;
2855   IS coarse_IS;
2856   Mat matis_coarse_local_mat;
2857 
2858   /* OLD version from here */
2859 
2860   /* Construct needed data structures for message passing */
2861   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2862   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2863   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2864   j = size_prec_comm;
2865   ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr);
2866   ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr);
2867   /* Gather local_primal_size information for all processes  */
2868   ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2869   replicated_primal_size = 0;
2870   for (i=0; i<j; i++) {
2871     local_primal_displacements[i] = replicated_primal_size ;
2872     replicated_primal_size += local_primal_sizes[i];
2873   }
2874 
2875 #if defined(PETSC_HAVE_METIS)
2876       /* we need additional variables */
2877       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2878       MetisInt    *metis_coarse_subdivision;
2879       MetisInt    options[METIS_NOPTIONS];
2880       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2881       PetscMPIInt procs_jumps_coarse_comm;
2882       PetscMPIInt *coarse_subdivision;
2883       PetscMPIInt *total_count_recv;
2884       PetscMPIInt *total_ranks_recv;
2885       PetscMPIInt *displacements_recv;
2886       PetscMPIInt *my_faces_connectivity;
2887       PetscMPIInt *petsc_faces_adjncy;
2888       MetisInt    *faces_adjncy;
2889       MetisInt    *faces_xadj;
2890       PetscMPIInt *number_of_faces;
2891       PetscMPIInt *faces_displacements;
2892       PetscInt    *array_int;
2893       PetscMPIInt my_faces=0;
2894       PetscMPIInt total_faces=0;
2895       PetscInt    ranks_stretching_ratio;
2896 
2897       /* define some quantities */
2898       coarse_pc_type  = PCBDDC;
2899       coarse_ksp_type = KSPRICHARDSON;
2900 
2901       /* details of coarse decomposition */
2902       n_subdomains = active_procs;
2903       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2904       ranks_stretching_ratio = size_prec_comm/active_procs;
2905       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2906 
2907       /* build CSR graph of subdomains' connectivity */
2908       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2909       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2910       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2911         for (j=0;j<pcis->n_shared[i];j++){
2912           array_int[ pcis->shared[i][j] ]+=1;
2913         }
2914       }
2915       for (i=1;i<pcis->n_neigh;i++){
2916         for (j=0;j<pcis->n_shared[i];j++){
2917           if (array_int[ pcis->shared[i][j] ] > 0 ){
2918             my_faces++;
2919             break;
2920           }
2921         }
2922       }
2923 
2924       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2925       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2926       my_faces=0;
2927       for (i=1;i<pcis->n_neigh;i++){
2928         for (j=0;j<pcis->n_shared[i];j++){
2929           if (array_int[ pcis->shared[i][j] ] > 0 ){
2930             my_faces_connectivity[my_faces]=pcis->neigh[i];
2931             my_faces++;
2932             break;
2933           }
2934         }
2935       }
2936       if (rank_prec_comm == master_proc) {
2937         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2938         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2939         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2940         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2941         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2942       }
2943       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2944       if (rank_prec_comm == master_proc) {
2945         faces_xadj[0]=0;
2946         faces_displacements[0]=0;
2947         j=0;
2948         for (i=1;i<size_prec_comm+1;i++) {
2949           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2950           if (number_of_faces[i-1]) {
2951             j++;
2952             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2953           }
2954         }
2955       }
2956       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2957       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2958       ierr = PetscFree(array_int);CHKERRQ(ierr);
2959       if (rank_prec_comm == master_proc) {
2960         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2961         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2962         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2963         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2964       }
2965 
2966       if ( rank_prec_comm == master_proc ) {
2967 
2968         PetscInt heuristic_for_metis=3;
2969 
2970         ncon=1;
2971         faces_nvtxs=n_subdomains;
2972         /* partition graoh induced by face connectivity */
2973         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2974         ierr = METIS_SetDefaultOptions(options);
2975         /* we need a contiguous partition of the coarse mesh */
2976         options[METIS_OPTION_CONTIG]=1;
2977         options[METIS_OPTION_NITER]=30;
2978         if (pcbddc->coarsening_ratio > 1) {
2979           if (n_subdomains>n_parts*heuristic_for_metis) {
2980             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2981             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2982             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2983             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2984           } else {
2985             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2986             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2987           }
2988         } else {
2989           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2990         }
2991         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2992         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2993         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2994 
2995         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2996         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2997         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2998         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2999       }
3000 
3001       /* Create new communicator for coarse problem splitting the old one */
3002       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
3003         coarse_color=0;              /* for communicator splitting */
3004       }
3005       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
3006          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
3007       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
3008 
3009       if ( coarse_color == 0 ) {
3010         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
3011         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
3012       } else {
3013         rank_coarse_comm = MPI_PROC_NULL;
3014       }
3015 
3016       /* master proc take care of arranging and distributing coarse information */
3017       if (rank_coarse_comm == master_proc) {
3018         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
3019         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
3020         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
3021         /* some initializations */
3022         displacements_recv[0]=0;
3023         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
3024         /* count from how many processes the j-th process of the coarse decomposition will receive data */
3025         for (j=0;j<size_coarse_comm;j++) {
3026           for (i=0;i<size_prec_comm;i++) {
3027           if (coarse_subdivision[i]==j) total_count_recv[j]++;
3028           }
3029         }
3030         /* displacements needed for scatterv of total_ranks_recv */
3031       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
3032 
3033         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
3034         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
3035         for (j=0;j<size_coarse_comm;j++) {
3036           for (i=0;i<size_prec_comm;i++) {
3037             if (coarse_subdivision[i]==j) {
3038               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
3039               total_count_recv[j]+=1;
3040             }
3041           }
3042         }
3043         /*for (j=0;j<size_coarse_comm;j++) {
3044           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
3045           for (i=0;i<total_count_recv[j];i++) {
3046             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
3047           }
3048           printf("\n");
3049         }*/
3050 
3051         /* identify new decomposition in terms of ranks in the old communicator */
3052         for (i=0;i<n_subdomains;i++) {
3053           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
3054         }
3055         /*printf("coarse_subdivision in old end new ranks\n");
3056         for (i=0;i<size_prec_comm;i++)
3057           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
3058             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
3059           } else {
3060             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
3061           }
3062         printf("\n");*/
3063       }
3064 
3065       /* Scatter new decomposition for send details */
3066       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
3067       /* Scatter receiving details to members of coarse decomposition */
3068       if ( coarse_color == 0) {
3069         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
3070         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
3071         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
3072       }
3073 
3074       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
3075       if (coarse_color == 0) {
3076         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
3077         for (i=0;i<count_recv;i++)
3078           printf("%d ",ranks_recv[i]);
3079         printf("\n");
3080       }*/
3081 
3082       if (rank_prec_comm == master_proc) {
3083         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
3084         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
3085         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
3086         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
3087       }
3088 #endif
3089 
3090 
3091           if(pcbddc->coarsening_ratio == 1) {
3092             ins_local_primal_size = pcbddc->local_primal_size;
3093             ins_local_primal_indices = local_primal_indices;
3094             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
3095             /* nonzeros */
3096             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
3097             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
3098             for (i=0;i<ins_local_primal_size;i++) {
3099               dnz[i] = ins_local_primal_size;
3100             }
3101           } else {
3102             PetscMPIInt send_size;
3103             PetscMPIInt *send_buffer;
3104             PetscInt    *aux_ins_indices;
3105             PetscInt    ii,jj;
3106             MPI_Request *requests;
3107 
3108             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
3109             /* reusing local_primal_displacements and replicated_primal_size */
3110             ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
3111             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr);
3112             replicated_primal_size = count_recv;
3113             j = 0;
3114             for (i=0;i<count_recv;i++) {
3115               local_primal_displacements[i] = j;
3116               j += local_primal_sizes[ranks_recv[i]];
3117             }
3118             local_primal_displacements[count_recv] = j;
3119             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr);
3120             /* allocate auxiliary space */
3121             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
3122             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
3123             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
3124             /* allocate stuffs for message massing */
3125             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
3126             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
3127             /* send indices to be inserted */
3128             for (i=0;i<count_recv;i++) {
3129               send_size = local_primal_sizes[ranks_recv[i]];
3130               ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
3131             }
3132             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
3133               send_size = pcbddc->local_primal_size;
3134               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
3135               for (i=0;i<send_size;i++) {
3136                 send_buffer[i]=(PetscMPIInt)local_primal_indices[i];
3137               }
3138               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
3139             }
3140             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3141             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
3142               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
3143             }
3144             j = 0;
3145             for (i=0;i<count_recv;i++) {
3146               ii = local_primal_displacements[i+1]-local_primal_displacements[i];
3147               localsizes2[i] = ii*ii;
3148               localdispl2[i] = j;
3149               j += localsizes2[i];
3150               jj = local_primal_displacements[i];
3151               /* it counts the coarse subdomains sharing the coarse node */
3152               for (k=0;k<ii;k++) {
3153                 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1;
3154               }
3155             }
3156             /* temp_coarse_mat_vals used to store matrix values to be received */
3157             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
3158             /* evaluate how many values I will insert in coarse mat */
3159             ins_local_primal_size = 0;
3160             for (i=0;i<coarse_size;i++) {
3161               if (aux_ins_indices[i]) {
3162                 ins_local_primal_size++;
3163               }
3164             }
3165             /* evaluate indices I will insert in coarse mat */
3166             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3167             j = 0;
3168             for(i=0;i<coarse_size;i++) {
3169               if(aux_ins_indices[i]) {
3170                 ins_local_primal_indices[j] = i;
3171                 j++;
3172               }
3173             }
3174             /* processes partecipating in coarse problem receive matrix data from their friends */
3175             for (i=0;i<count_recv;i++) {
3176               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
3177             }
3178             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
3179               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
3180               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
3181             }
3182             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3183             /* nonzeros */
3184             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
3185             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
3186             /* use aux_ins_indices to realize a global to local mapping */
3187             j=0;
3188             for(i=0;i<coarse_size;i++){
3189               if(aux_ins_indices[i]==0){
3190                 aux_ins_indices[i]=-1;
3191               } else {
3192                 aux_ins_indices[i]=j;
3193                 j++;
3194               }
3195             }
3196             for (i=0;i<count_recv;i++) {
3197               j = local_primal_sizes[ranks_recv[i]];
3198               for (k=0;k<j;k++) {
3199                 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j;
3200               }
3201             }
3202             /* check */
3203             for (i=0;i<ins_local_primal_size;i++) {
3204               if (dnz[i] > ins_local_primal_size) {
3205                 dnz[i] = ins_local_primal_size;
3206               }
3207             }
3208             ierr = PetscFree(requests);CHKERRQ(ierr);
3209             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
3210             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
3211           }
3212           /* create local to global mapping needed by coarse MATIS */
3213           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
3214           coarse_comm = prec_comm;
3215           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
3216           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
3217           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
3218   /* Now create and fill up coarse matrix */
3219 
3220 
3221 
3222       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr);
3223       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
3224       ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
3225       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
3226       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
3227       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
3228       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
3229       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3230       /* preallocation */
3231       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3232       ierr = PetscFree(dnz);CHKERRQ(ierr);
3233       /* insert values */
3234       if (pcbddc->coarsening_ratio == 1) {
3235         ins_coarse_mat_vals = coarse_submat_vals;
3236         ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3237       } else {
3238         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3239         for (k=0;k<replicated_primal_size;k++) {
3240           offset = local_primal_displacements[k];
3241           offset2 = localdispl2[k];
3242           ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k];
3243           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3244           for (j=0;j<ins_local_primal_size;j++){
3245             ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j];
3246           }
3247           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3248           ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3249           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3250         }
3251       }
3252       ins_local_primal_indices = 0;
3253       ins_coarse_mat_vals = 0;
3254     ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255     ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   { /* check */
3257     Vec       _lvec,_rvec;
3258     PetscReal _infty_error;
3259 
3260     //ierr = MatView(mat,0);CHKERRQ(ierr);
3261     //ierr = MatView(new_mat,0);CHKERRQ(ierr);
3262     ierr = MatGetVecs(coarse_mat,&_rvec,&_lvec);CHKERRQ(ierr);
3263     ierr = VecSetRandom(_rvec,NULL);CHKERRQ(ierr);
3264     ierr = MatMult(coarse_mat,_rvec,_lvec);CHKERRQ(ierr);
3265     ierr = VecScale(_lvec,-1.0);CHKERRQ(ierr);
3266     ierr = MatMultAdd(coarse_mat_is_subassembled,_rvec,_lvec,_lvec);CHKERRQ(ierr);
3267     ierr = VecNorm(_lvec,NORM_INFINITY,&_infty_error);CHKERRQ(ierr);
3268     ierr = PetscPrintf(PetscObjectComm((PetscObject)coarse_mat),"Infinity error subassembling against old code %1.6e\n",_infty_error);
3269     ierr = VecDestroy(&_rvec);CHKERRQ(ierr);
3270     ierr = VecDestroy(&_lvec);CHKERRQ(ierr);
3271   }
3272   ierr = MatDestroy(&coarse_mat_is_subassembled);CHKERRQ(ierr);
3273   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3274   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3275   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3276   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3277   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3278   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3279   ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr);
3280   ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
3281   ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr);
3282   }
3283 #endif
3284 
3285   /* propagate symmetry info */
3286   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3287 
3288   /* create local to global scatters */
3289   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3290   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3291 
3292   /* free memory no longer needed */
3293   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3294   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3295   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3296   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3297   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3298 
3299   /* Compute coarse null space */
3300   CoarseNullSpace = 0;
3301   if (pcbddc->NullSpace) {
3302     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3303   }
3304 
3305   /* KSP for coarse problem */
3306   {
3307     PetscBool isbddc=PETSC_FALSE;
3308     PetscInt  max_it;
3309     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3310     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3311     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3312     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3313     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3314     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3315     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3316     /* Allow user's customization */
3317     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3318     /* Set Up PC for coarse problem BDDC */
3319     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3320       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3321       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3322       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3323       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3324       if (CoarseNullSpace) {
3325         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3326       }
3327       if (pcbddc->dbg_flag) {
3328         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"----------------Level %d -> Setting up next level---------------\n",pcbddc->current_level);CHKERRQ(ierr);
3329         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3330       }
3331     } else {
3332       if (CoarseNullSpace) {
3333         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3334       }
3335     }
3336     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3337     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3338 
3339     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3340     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3341     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3342     if (max_it == 1) {
3343       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3344       if (isbddc) {
3345         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3346       }
3347     }
3348   }
3349   /* Check coarse problem if requested */
3350   if (pcbddc->dbg_flag) {
3351     KSP check_ksp;
3352     PC  check_pc;
3353     Vec check_vec;
3354     PetscInt its;
3355     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3356     KSPType check_ksp_type;
3357 
3358     /* Create ksp object suitable for extreme eigenvalues' estimation */
3359     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr);
3360     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3361     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3362     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3363       if (issym) check_ksp_type = KSPCG;
3364       else check_ksp_type = KSPGMRES;
3365       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3366     } else {
3367       check_ksp_type = KSPPREONLY;
3368     }
3369     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3370     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3371     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3372     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3373     /* create random vec */
3374     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3375     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3376     if (CoarseNullSpace) {
3377       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3378     }
3379     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3380     /* solve coarse problem */
3381     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3382     if (CoarseNullSpace) {
3383       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3384     }
3385     /* check coarse problem residual error */
3386     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3387     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3388     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3389     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3390     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3391     /* get eigenvalue estimation if inexact */
3392     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3393       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3394       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3395       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",its,check_ksp_type);CHKERRQ(ierr);
3396       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3397     }
3398     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3399     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3400     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3401   }
3402   if (pcbddc->dbg_flag) {
3403     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3404   }
3405   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3406   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3412 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3413 {
3414   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3415   PC_IS*         pcis = (PC_IS*)pc->data;
3416   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3417   PetscInt       i,j,coarse_size;
3418   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3419   PetscErrorCode ierr;
3420 
3421   PetscFunctionBegin;
3422   /* get indices in local ordering for vertices and constraints */
3423   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3424   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3425   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3426   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3427   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3428   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3429   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3430 
3431   /* Compute global number of coarse dofs */
3432   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3433 
3434   /* check numbering */
3435   if (pcbddc->dbg_flag) {
3436     PetscScalar coarsesum,*array;
3437 
3438     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3439     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3440     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3441     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3442     for (i=0;i<pcbddc->local_primal_size;i++) {
3443       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3444     }
3445     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3446     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3447     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3448     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3449     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3450     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3451     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3452     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3453     for (i=0;i<pcis->n;i++) {
3454       if (array[i] == 1.0) {
3455         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3456       }
3457     }
3458     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3459     for (i=0;i<pcis->n;i++) {
3460       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3461     }
3462     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3463     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3464     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3465     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3466     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3467     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3468     if (pcbddc->dbg_flag > 1) {
3469       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3470       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3471       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3472       for (i=0;i<pcbddc->local_primal_size;i++) {
3473         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3474       }
3475       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3476     }
3477     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3478   }
3479   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3480   /* get back data */
3481   *coarse_size_n = coarse_size;
3482   *local_primal_indices_n = local_primal_indices;
3483   PetscFunctionReturn(0);
3484 }
3485 
3486