xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 9881197aa671a785a78c2af771fc6e38049d9b71)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
105   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
109   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
110   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
113   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
114   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCBDDCCreateWorkVectors"
121 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
122 {
123   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
124   PC_IS          *pcis = (PC_IS*)pc->data;
125   VecType        impVecType;
126   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
131   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
132   local_primal_size = n_constraints+n_vertices;
133   n_R = pcis->n-n_vertices;
134   /* local work vectors */
135   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
136   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
137   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
138   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
139   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
140   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
141   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
142   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
143   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
144   if (n_constraints) {
145     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
146     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
147     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
154 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
155 {
156   PetscErrorCode         ierr;
157   /* pointers to pcis and pcbddc */
158   PC_IS*                 pcis = (PC_IS*)pc->data;
159   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
160   /* submatrices of local problem */
161   Mat                    A_RV,A_VR,A_VV;
162   /* working matrices */
163   Mat                    M1,M2,M3,C_CR;
164   /* working vectors */
165   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
166   /* additional working stuff */
167   IS                     is_aux;
168   ISLocalToGlobalMapping BtoNmap;
169   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
170   const PetscScalar      *array,*row_cmat_values;
171   const PetscInt         *row_cmat_indices,*idx_R_local;
172   PetscInt               *vertices,*idx_V_B,*auxindices;
173   PetscInt               n_vertices,n_constraints,size_of_constraint;
174   PetscInt               i,j,n_R,n_D,n_B;
175   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
176   /* Vector and matrix types */
177   VecType                impVecType;
178   MatType                impMatType;
179   /* some shortcuts to scalars */
180   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
181   /* for debugging purposes */
182   PetscReal              *coarsefunctions_errors,*constraints_errors;
183 
184   PetscFunctionBegin;
185   /* get number of vertices and their local indices */
186   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
187   n_constraints = pcbddc->local_primal_size-n_vertices;
188   /* Set Non-overlapping dimensions */
189   n_B = pcis->n_B; n_D = pcis->n - n_B;
190   n_R = pcis->n-n_vertices;
191 
192   /* Set types for local objects needed by BDDC precondtioner */
193   impMatType = MATSEQDENSE;
194   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
195 
196   /* Allocating some extra storage just to be safe */
197   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
198   for (i=0;i<pcis->n;i++) auxindices[i]=i;
199 
200   /* vertices in boundary numbering */
201   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
202   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
203   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
204   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
205   if (i != n_vertices) {
206     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
207   }
208 
209   /* some work vectors on vertices and/or constraints */
210   if (n_vertices) {
211     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
212     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
213     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
214     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
215   }
216   if (n_constraints) {
217     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
218     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
219   }
220 
221   /* Precompute stuffs needed for preprocessing and application of BDDC*/
222   if (n_constraints) {
223     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
224     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
225     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
226     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
227 
228     /* Extract constraints on R nodes: C_{CR}  */
229     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
230     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
231     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
232 
233     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
234     for (i=0;i<n_constraints;i++) {
235       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
236       /* Get row of constraint matrix in R numbering */
237       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
238       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
239       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
240       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
241       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
242       /* Solve for row of constraint matrix in R numbering */
243       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
244       /* Set values in local_auxmat2 */
245       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
246       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
247       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
248     }
249     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
250     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
252 
253     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
254     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
255     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
256     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
257     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
258     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
259     ierr = MatSetUp(M1);CHKERRQ(ierr);
260     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
261     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
262     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
263     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
264     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
265     ierr = MatDestroy(&M2);CHKERRQ(ierr);
266     ierr = MatDestroy(&M3);CHKERRQ(ierr);
267     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
268     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
269   }
270 
271   /* Get submatrices from subdomain matrix */
272   if (n_vertices) {
273     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
274     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
275     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
277     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
278   }
279 
280   /* Matrix of coarse basis functions (local) */
281   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
282   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
283   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
284   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
285   if (pcbddc->switch_static || pcbddc->dbg_flag) {
286     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
287     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
288     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
289     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
290   }
291 
292   if (pcbddc->dbg_flag) {
293     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
294     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
295     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
296   }
297   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
298   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
299 
300   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
301 
302   /* vertices */
303   for (i=0;i<n_vertices;i++) {
304     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
305     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
306     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
307     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
308     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
309     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
310     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
311     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
312     if (n_constraints) {
313       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
314       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
315       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
316     }
317     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
318     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
319 
320     /* Set values in coarse basis function and subdomain part of coarse_mat */
321     /* coarse basis functions */
322     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
323     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
324     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
326     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
327     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
328     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
329     if (pcbddc->switch_static || pcbddc->dbg_flag) {
330       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
331       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
333       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
334       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
335     }
336     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
337     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
338     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
339     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
340     if (n_constraints) {
341       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
342       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
343       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
344     }
345 
346     /* check */
347     if (pcbddc->dbg_flag) {
348       /* assemble subdomain vector on local nodes */
349       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
350       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
351       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
352       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
353       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
354       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
355       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
356       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
357       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
358       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
359       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
360       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
361       if (n_constraints) {
362         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
363         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
364         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
365       }
366       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
367       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
368       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
369       /* check saddle point solution */
370       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
371       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
372       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
373       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
374       /* shift by the identity matrix */
375       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
379     }
380   }
381 
382   /* constraints */
383   for (i=0;i<n_constraints;i++) {
384     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
385     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
386     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
387     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
388     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
389     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
390     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
391     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
392     if (n_vertices) {
393       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
394     }
395     /* Set values in coarse basis function and subdomain part of coarse_mat */
396     /* coarse basis functions */
397     j = i+n_vertices; /* don't touch this! */
398     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
399     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
402     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
403     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
404     if (pcbddc->switch_static || pcbddc->dbg_flag) {
405       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
406       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
408       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
409       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410     }
411     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
412     if (n_vertices) {
413       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
416     }
417     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
418     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
419     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
420 
421     if (pcbddc->dbg_flag) {
422       /* assemble subdomain vector on nodes */
423       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
424       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
428       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
429       /* assemble subdomain vector of lagrange multipliers */
430       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
431       if (n_vertices) {
432         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
433         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
434         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
435       }
436       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
438       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
439       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
442       /* check saddle point solution */
443       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
446       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
447       /* shift by the identity matrix */
448       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
452     }
453   }
454   /* call assembling routines for local coarse basis */
455   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   if (pcbddc->switch_static || pcbddc->dbg_flag) {
458     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460   }
461 
462   /* compute other basis functions for non-symmetric problems */
463   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
464   if (!setsym || (setsym && !issym)) {
465     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
466     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
467     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
468     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
469     if (pcbddc->switch_static || pcbddc->dbg_flag) {
470       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
471       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
472       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
473       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
474     }
475     for (i=0;i<pcbddc->local_primal_size;i++) {
476       if (n_constraints) {
477         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
478         for (j=0;j<n_constraints;j++) {
479           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
480         }
481         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
482         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
483       }
484       if (i<n_vertices) {
485         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
486         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
487         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
488         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
489         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
490         if (n_constraints) {
491           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
492         }
493       } else {
494         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
495       }
496       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
497       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
498       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
499       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
501       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
502       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
503       if (i<n_vertices) {
504         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
505       }
506       if (pcbddc->switch_static || pcbddc->dbg_flag) {
507         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
510         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
511         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
512       }
513 
514       if (pcbddc->dbg_flag) {
515         /* assemble subdomain vector on nodes */
516         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
517         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
518         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
519         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
520         if (i<n_vertices) {
521           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
522         }
523         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
524         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
525         /* assemble subdomain vector of lagrange multipliers */
526         for (j=0;j<pcbddc->local_primal_size;j++) {
527           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
528         }
529         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
530         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
531         /* check saddle point solution */
532         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
533         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
534         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
535         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
536         /* shift by the identity matrix */
537         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
538         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
539         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
541       }
542     }
543     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     if (pcbddc->switch_static || pcbddc->dbg_flag) {
546       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548     }
549   }
550   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
551   /* Checking coarse_sub_mat and coarse basis functios */
552   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
553   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
554   if (pcbddc->dbg_flag) {
555     Mat         coarse_sub_mat;
556     Mat         AUXMAT,TM1,TM2,TM3,TM4;
557     Mat         coarse_phi_D,coarse_phi_B;
558     Mat         coarse_psi_D,coarse_psi_B;
559     Mat         A_II,A_BB,A_IB,A_BI;
560     MatType     checkmattype=MATSEQAIJ;
561     PetscReal   real_value;
562 
563     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
564     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
565     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
567     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
568     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
569     if (pcbddc->coarse_psi_B) {
570       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
571       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
572     }
573     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
574 
575     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
576     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
577     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
578     if (pcbddc->coarse_psi_B) {
579       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
580       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
581       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
582       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
583       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
584       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
585       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
586       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
587       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
588       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
589       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
590       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
591     } else {
592       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
593       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
594       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
598       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
599       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
600     }
601     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
602     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
605     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
606     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
607     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
611     for (i=0;i<pcbddc->local_primal_size;i++) {
612       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
613     }
614     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
615     for (i=0;i<pcbddc->local_primal_size;i++) {
616       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
617     }
618     if (pcbddc->coarse_psi_B) {
619       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
620       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
621         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
622       }
623       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
624       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
625         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
626       }
627     }
628     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
629     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
630     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
633     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
634     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
637     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
638     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
639     if (pcbddc->coarse_psi_B) {
640       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
641       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
642     }
643     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
644     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
645     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
646     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
647   }
648   /* free memory */
649   if (n_vertices) {
650     ierr = PetscFree(vertices);CHKERRQ(ierr);
651     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
652     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
653     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
654     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
656   }
657   if (n_constraints) {
658     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
659     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
660     ierr = MatDestroy(&M1);CHKERRQ(ierr);
661     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
662   }
663   ierr = PetscFree(auxindices);CHKERRQ(ierr);
664   /* get back data */
665   *coarse_submat_vals_n = coarse_submat_vals;
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
671 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
672 {
673   PC_IS*            pcis = (PC_IS*)(pc->data);
674   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
675   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
676   /* manage repeated solves */
677   MatReuse          reuse;
678   MatStructure      matstruct;
679   PetscErrorCode    ierr;
680 
681   PetscFunctionBegin;
682   /* get mat flags */
683   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
684   reuse = MAT_INITIAL_MATRIX;
685   if (pc->setupcalled) {
686     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
687     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
688     if (matstruct == SAME_NONZERO_PATTERN) {
689       reuse = MAT_REUSE_MATRIX;
690     } else {
691       reuse = MAT_INITIAL_MATRIX;
692     }
693   }
694   if (reuse == MAT_INITIAL_MATRIX) {
695     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
696     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
700   }
701 
702   /* transform local matrices if needed */
703   if (pcbddc->use_change_of_basis) {
704     Mat         change_mat_all;
705     PetscScalar *row_cmat_values;
706     PetscInt    *row_cmat_indices;
707     PetscInt    *nnz,*is_indices,*temp_indices;
708     PetscInt    i,j,k,n_D,n_B;
709 
710     /* Get Non-overlapping dimensions */
711     n_B = pcis->n_B;
712     n_D = pcis->n-n_B;
713 
714     /* compute nonzero structure of change of basis on all local nodes */
715     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
716     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
718     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
719     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
720     k=1;
721     for (i=0;i<n_B;i++) {
722       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
723       nnz[is_indices[i]]=j;
724       if (k < j) k = j;
725       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
726     }
727     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     /* assemble change of basis matrix on the whole set of local dofs */
729     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
730     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
731     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
732     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
733     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
734     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
735     for (i=0;i<n_D;i++) {
736       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
737     }
738     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     for (i=0;i<n_B;i++) {
741       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
742       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
743       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
744       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
745     }
746     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
747     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
748     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
749     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
750     if (i==1) {
751       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
752     } else {
753       Mat work_mat;
754       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
755       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
756       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
757     }
758     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
759     ierr = PetscFree(nnz);CHKERRQ(ierr);
760     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
761   } else {
762     /* without change of basis, the local matrix is unchanged */
763     if (!pcbddc->local_mat) {
764       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
765       pcbddc->local_mat = matis->A;
766     }
767   }
768 
769   /* get submatrices */
770   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
779 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
780 {
781   PC_IS*         pcis = (PC_IS*)(pc->data);
782   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
783   IS             is_aux1,is_aux2;
784   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
785   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
786   PetscBT        bitmask;
787   PetscErrorCode ierr;
788 
789   PetscFunctionBegin;
790   /* Set Non-overlapping dimensions */
791   n_B = pcis->n_B; n_D = pcis->n - n_B;
792   /* get vertex indices from constraint matrix */
793   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
794   /* Set number of constraints */
795   n_constraints = pcbddc->local_primal_size-n_vertices;
796   /* create auxiliary bitmask */
797   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
798   for (i=0;i<n_vertices;i++) {
799     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
800   }
801   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
802   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
803   for (i=0, n_R=0; i<pcis->n; i++) {
804     if (!PetscBTLookup(bitmask,i)) {
805       idx_R_local[n_R] = i;
806       n_R++;
807     }
808   }
809   ierr = PetscFree(vertices);CHKERRQ(ierr);
810   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
811 
812   /* print some info if requested */
813   if (pcbddc->dbg_flag) {
814     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
815     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
816     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
820     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
821   }
822 
823   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
824   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
825   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
826   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
827   for (i=0; i<n_D; i++) {
828     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
829   }
830   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
831   for (i=0, j=0; i<n_R; i++) {
832     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
833       aux_array1[j++] = i;
834     }
835   }
836   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
837   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
838   for (i=0, j=0; i<n_B; i++) {
839     if (!PetscBTLookup(bitmask,is_indices[i])) {
840       aux_array2[j++] = i;
841     }
842   }
843   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
844   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
845   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
846   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
847   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
848 
849   if (pcbddc->switch_static || pcbddc->dbg_flag) {
850     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
851     for (i=0, j=0; i<n_R; i++) {
852       if (PetscBTLookup(bitmask,idx_R_local[i])) {
853         aux_array1[j++] = i;
854       }
855     }
856     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
857     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
858     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
859   }
860   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
866 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
867 {
868   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
869 
870   PetscFunctionBegin;
871   pcbddc->use_exact_dirichlet=use;
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
877 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
878 {
879   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
880   PC_IS          *pcis = (PC_IS*)pc->data;
881   PC             pc_temp;
882   Mat            A_RR;
883   Vec            vec1,vec2,vec3;
884   MatStructure   matstruct;
885   PetscScalar    m_one = -1.0;
886   PetscReal      value;
887   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891   /* Creating PC contexts for local Dirichlet and Neumann problems */
892   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
893 
894   /* DIRICHLET PROBLEM */
895   /* Matrix for Dirichlet problem is pcis->A_II */
896   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
897   if (!pcbddc->ksp_D) { /* create object if not yet build */
898     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
899     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
900     /* default */
901     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
902     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
903     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
904     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
905     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
906   }
907   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
908   /* Allow user's customization */
909   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
910   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
911   if (!n_D) {
912     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
913     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
914   }
915   /* Set Up KSP for Dirichlet problem of BDDC */
916   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
917   /* set ksp_D into pcis data */
918   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
919   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
920   pcis->ksp_D = pcbddc->ksp_D;
921 
922   /* NEUMANN PROBLEM */
923   /* Matrix for Neumann problem is A_RR -> we need to create it */
924   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
925   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
926   if (!pcbddc->ksp_R) { /* create object if not yet build */
927     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
928     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
929     /* default */
930     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
931     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
932     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
933     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
934     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
935   }
936   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
937   /* Allow user's customization */
938   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
939   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
940   if (!n_R) {
941     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
942     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
943   }
944   /* Set Up KSP for Neumann problem of BDDC */
945   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
946 
947   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
948 
949   /* Dirichlet */
950   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
951   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
952   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
953   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
954   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
955   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
956   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
957   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
958   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
959   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
960   /* need to be adapted? */
961   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
962   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
963   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
964   /* print info */
965   if (pcbddc->dbg_flag) {
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
970     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
971   }
972   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
973     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
974   }
975 
976   /* Neumann */
977   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
978   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
979   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
980   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
981   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
982   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
983   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
984   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
987   /* need to be adapted? */
988   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
989   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
990   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
991   /* print info */
992   if (pcbddc->dbg_flag) {
993     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
994     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
995   }
996   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
997     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
998   }
999 
1000   /* free Neumann problem's matrix */
1001   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1007 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1008 {
1009   PetscErrorCode ierr;
1010   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1011 
1012   PetscFunctionBegin;
1013   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1014   if (pcbddc->local_auxmat1) {
1015     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1016     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1023 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1024 {
1025   PetscErrorCode ierr;
1026   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1027   PC_IS*            pcis = (PC_IS*)  (pc->data);
1028   const PetscScalar zero = 0.0;
1029 
1030   PetscFunctionBegin;
1031   /* Application of PHI^T (or PSI^T)  */
1032   if (pcbddc->coarse_psi_B) {
1033     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1034     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1035   } else {
1036     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1037     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1038   }
1039   /* Scatter data of coarse_rhs */
1040   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1041   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042 
1043   /* Local solution on R nodes */
1044   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1045   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   if (pcbddc->switch_static) {
1048     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050   }
1051   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1052   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1053   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   if (pcbddc->switch_static) {
1056     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058   }
1059 
1060   /* Coarse solution */
1061   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1063     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1064   }
1065   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1066   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067 
1068   /* Sum contributions from two levels */
1069   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1070   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1076 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1077 {
1078   PetscErrorCode ierr;
1079   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1080 
1081   PetscFunctionBegin;
1082   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1088 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1089 {
1090   PetscErrorCode ierr;
1091   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1092 
1093   PetscFunctionBegin;
1094   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* uncomment for testing purposes */
1099 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1100 #undef __FUNCT__
1101 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1102 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1103 {
1104   PetscErrorCode    ierr;
1105   PC_IS*            pcis = (PC_IS*)(pc->data);
1106   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1107   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1108   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1109   MatType           impMatType=MATSEQAIJ;
1110   /* one and zero */
1111   PetscScalar       one=1.0,zero=0.0;
1112   /* space to store constraints and their local indices */
1113   PetscScalar       *temp_quadrature_constraint;
1114   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1115   /* iterators */
1116   PetscInt          i,j,k,total_counts,temp_start_ptr;
1117   /* stuff to store connected components stored in pcbddc->mat_graph */
1118   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1119   PetscInt          n_ISForFaces,n_ISForEdges;
1120   /* near null space stuff */
1121   MatNullSpace      nearnullsp;
1122   const Vec         *nearnullvecs;
1123   Vec               *localnearnullsp;
1124   PetscBool         nnsp_has_cnst;
1125   PetscInt          nnsp_size;
1126   PetscScalar       *array;
1127   /* BLAS integers */
1128   PetscBLASInt      lwork,lierr;
1129   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1130   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1131   /* LAPACK working arrays for SVD or POD */
1132   PetscBool         skip_lapack;
1133   PetscScalar       *work;
1134   PetscReal         *singular_vals;
1135 #if defined(PETSC_USE_COMPLEX)
1136   PetscReal         *rwork;
1137 #endif
1138 #if defined(PETSC_MISSING_LAPACK_GESVD)
1139   PetscBLASInt      Blas_one_2=1;
1140   PetscScalar       *temp_basis,*correlation_mat;
1141 #else
1142   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1143   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1144 #endif
1145   /* change of basis */
1146   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1147   PetscBool         boolforchange;
1148   PetscBT           touched,change_basis;
1149   /* auxiliary stuff */
1150   PetscInt          *nnz,*is_indices,*local_to_B;
1151   /* some quantities */
1152   PetscInt          n_vertices,total_primal_vertices;
1153   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1154 
1155 
1156   PetscFunctionBegin;
1157   /* Get index sets for faces, edges and vertices from graph */
1158   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1159     pcbddc->use_vertices = PETSC_TRUE;
1160   }
1161   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1162   /* print some info */
1163   if (pcbddc->dbg_flag) {
1164     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1165     i = 0;
1166     if (ISForVertices) {
1167       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1168     }
1169     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1170     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1171     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1172     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1173   }
1174   /* check if near null space is attached to global mat */
1175   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1176   if (nearnullsp) {
1177     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1178   } else { /* if near null space is not provided BDDC uses constants by default */
1179     nnsp_size = 0;
1180     nnsp_has_cnst = PETSC_TRUE;
1181   }
1182   /* get max number of constraints on a single cc */
1183   max_constraints = nnsp_size;
1184   if (nnsp_has_cnst) max_constraints++;
1185 
1186   /*
1187        Evaluate maximum storage size needed by the procedure
1188        - temp_indices will contain start index of each constraint stored as follows
1189        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1190        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1191        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1192                                                                                                                                                          */
1193   total_counts = n_ISForFaces+n_ISForEdges;
1194   total_counts *= max_constraints;
1195   n_vertices = 0;
1196   if (ISForVertices) {
1197     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1198   }
1199   total_counts += n_vertices;
1200   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1201   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1202   total_counts = 0;
1203   max_size_of_constraint = 0;
1204   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1205     if (i<n_ISForEdges) {
1206       used_IS = &ISForEdges[i];
1207     } else {
1208       used_IS = &ISForFaces[i-n_ISForEdges];
1209     }
1210     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1211     total_counts += j;
1212     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1213   }
1214   total_counts *= max_constraints;
1215   total_counts += n_vertices;
1216   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1217   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1218   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1219   /* local to boundary numbering */
1220   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1221   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1222   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1223   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1224   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1225   /* get local part of global near null space vectors */
1226   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1227   for (k=0;k<nnsp_size;k++) {
1228     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1229     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231   }
1232 
1233   /* whether or not to skip lapack calls */
1234   skip_lapack = PETSC_TRUE;
1235   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1236 
1237   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1238   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1239     PetscScalar temp_work;
1240 #if defined(PETSC_MISSING_LAPACK_GESVD)
1241     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1242     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1243     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1244     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1245 #if defined(PETSC_USE_COMPLEX)
1246     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1247 #endif
1248     /* now we evaluate the optimal workspace using query with lwork=-1 */
1249     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1250     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1251     lwork = -1;
1252     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1253 #if !defined(PETSC_USE_COMPLEX)
1254     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1255 #else
1256     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1257 #endif
1258     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1259     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1260 #else /* on missing GESVD */
1261     /* SVD */
1262     PetscInt max_n,min_n;
1263     max_n = max_size_of_constraint;
1264     min_n = max_constraints;
1265     if (max_size_of_constraint < max_constraints) {
1266       min_n = max_size_of_constraint;
1267       max_n = max_constraints;
1268     }
1269     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1270 #if defined(PETSC_USE_COMPLEX)
1271     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1272 #endif
1273     /* now we evaluate the optimal workspace using query with lwork=-1 */
1274     lwork = -1;
1275     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1276     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1277     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1278     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1279 #if !defined(PETSC_USE_COMPLEX)
1280     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1281 #else
1282     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1283 #endif
1284     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1285     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1286 #endif /* on missing GESVD */
1287     /* Allocate optimal workspace */
1288     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1289     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1290   }
1291   /* Now we can loop on constraining sets */
1292   total_counts = 0;
1293   temp_indices[0] = 0;
1294   /* vertices */
1295   if (ISForVertices) {
1296     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1297     if (nnsp_has_cnst) { /* consider all vertices */
1298       for (i=0;i<n_vertices;i++) {
1299         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1300         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1301         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1302         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1303         total_counts++;
1304       }
1305     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1306       PetscBool used_vertex;
1307       for (i=0;i<n_vertices;i++) {
1308         used_vertex = PETSC_FALSE;
1309         k = 0;
1310         while (!used_vertex && k<nnsp_size) {
1311           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1312           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1313             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1314             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1315             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1316             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1317             total_counts++;
1318             used_vertex = PETSC_TRUE;
1319           }
1320           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1321           k++;
1322         }
1323       }
1324     }
1325     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1326     n_vertices = total_counts;
1327   }
1328 
1329   /* edges and faces */
1330   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1331     if (i<n_ISForEdges) {
1332       used_IS = &ISForEdges[i];
1333       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1334     } else {
1335       used_IS = &ISForFaces[i-n_ISForEdges];
1336       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1337     }
1338     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1339     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1340     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1341     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1342     /* change of basis should not be performed on local periodic nodes */
1343     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1344     if (nnsp_has_cnst) {
1345       PetscScalar quad_value;
1346       temp_constraints++;
1347       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1348       for (j=0;j<size_of_constraint;j++) {
1349         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1350         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1351         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1352       }
1353       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1354       if (boolforchange) {
1355         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1356       }
1357       total_counts++;
1358     }
1359     for (k=0;k<nnsp_size;k++) {
1360       PetscReal real_value;
1361       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1362       for (j=0;j<size_of_constraint;j++) {
1363         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1364         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1365         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1366       }
1367       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1368       /* check if array is null on the connected component */
1369       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1370       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1371       if (real_value > 0.0) { /* keep indices and values */
1372         temp_constraints++;
1373         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1374         if (boolforchange) {
1375           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1376         }
1377         total_counts++;
1378       }
1379     }
1380     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1381     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1382     if (!pcbddc->use_nnsp_true) {
1383       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1384 
1385 #if defined(PETSC_MISSING_LAPACK_GESVD)
1386       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1387          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1388          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1389             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1390             from that computed using LAPACKgesvd
1391          -> This is due to a different computation of eigenvectors in LAPACKheev
1392          -> The quality of the POD-computed basis will be the same */
1393       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1394       /* Store upper triangular part of correlation matrix */
1395       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1396       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1397       for (j=0;j<temp_constraints;j++) {
1398         for (k=0;k<j+1;k++) {
1399           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1400         }
1401       }
1402       /* compute eigenvalues and eigenvectors of correlation matrix */
1403       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1404       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1405 #if !defined(PETSC_USE_COMPLEX)
1406       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1407 #else
1408       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1409 #endif
1410       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1411       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1412       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1413       j=0;
1414       while (j < temp_constraints && singular_vals[j] < tol) j++;
1415       total_counts=total_counts-j;
1416       /* scale and copy POD basis into used quadrature memory */
1417       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1418       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1419       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1420       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1421       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1422       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1423       if (j<temp_constraints) {
1424         PetscInt ii;
1425         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1426         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1427         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1428         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1429         for (k=0;k<temp_constraints-j;k++) {
1430           for (ii=0;ii<size_of_constraint;ii++) {
1431             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1432           }
1433         }
1434       }
1435 #else  /* on missing GESVD */
1436       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1437       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1438       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1439       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1440 #if !defined(PETSC_USE_COMPLEX)
1441       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1442 #else
1443       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1444 #endif
1445       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1446       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1447       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1448       k = temp_constraints;
1449       if (k > size_of_constraint) k = size_of_constraint;
1450       j = 0;
1451       while (j < k && singular_vals[k-j-1] < tol) j++;
1452       total_counts = total_counts-temp_constraints+k-j;
1453 #endif /* on missing GESVD */
1454     }
1455   }
1456   /* free index sets of faces, edges and vertices */
1457   for (i=0;i<n_ISForFaces;i++) {
1458     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1459   }
1460   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1461   for (i=0;i<n_ISForEdges;i++) {
1462     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1463   }
1464   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1465   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1466 
1467   /* free workspace */
1468   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1469     ierr = PetscFree(work);CHKERRQ(ierr);
1470 #if defined(PETSC_USE_COMPLEX)
1471     ierr = PetscFree(rwork);CHKERRQ(ierr);
1472 #endif
1473     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1474 #if defined(PETSC_MISSING_LAPACK_GESVD)
1475     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1476     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1477 #endif
1478   }
1479   for (k=0;k<nnsp_size;k++) {
1480     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1481   }
1482   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1483 
1484   /* set quantities in pcbddc data structure */
1485   /* n_vertices defines the number of subdomain corners in the primal space */
1486   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1487   pcbddc->local_primal_size = total_counts;
1488   pcbddc->n_vertices = n_vertices;
1489   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1490 
1491   /* Create constraint matrix */
1492   /* The constraint matrix is used to compute the l2g map of primal dofs */
1493   /* so we need to set it up properly either with or without change of basis */
1494   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1495   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1496   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1497   /* array to compute a local numbering of constraints : vertices first then constraints */
1498   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1499   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1500   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1501   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1502   /* auxiliary stuff for basis change */
1503   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1504   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1505 
1506   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1507   total_primal_vertices=0;
1508   for (i=0;i<pcbddc->local_primal_size;i++) {
1509     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1510     if (size_of_constraint == 1) {
1511       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1512       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1513       aux_primal_minloc[total_primal_vertices]=0;
1514       total_primal_vertices++;
1515     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1516       PetscInt min_loc,min_index;
1517       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1518       /* find first untouched local node */
1519       k = 0;
1520       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1521       min_index = global_indices[k];
1522       min_loc = k;
1523       /* search the minimum among global nodes already untouched on the cc */
1524       for (k=1;k<size_of_constraint;k++) {
1525         /* there can be more than one constraint on a single connected component */
1526         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1527           min_index = global_indices[k];
1528           min_loc = k;
1529         }
1530       }
1531       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1532       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1533       aux_primal_minloc[total_primal_vertices]=min_loc;
1534       total_primal_vertices++;
1535     }
1536   }
1537   /* free workspace */
1538   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1539   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1540   /* permute indices in order to have a sorted set of vertices */
1541   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1542 
1543   /* nonzero structure of constraint matrix */
1544   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1545   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1546   j=total_primal_vertices;
1547   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1548     if (!PetscBTLookup(change_basis,i)) {
1549       nnz[j]=temp_indices[i+1]-temp_indices[i];
1550       j++;
1551     }
1552   }
1553   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1554   ierr = PetscFree(nnz);CHKERRQ(ierr);
1555   /* set values in constraint matrix */
1556   for (i=0;i<total_primal_vertices;i++) {
1557     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1558   }
1559   total_counts = total_primal_vertices;
1560   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1561     if (!PetscBTLookup(change_basis,i)) {
1562       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1563       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1564       total_counts++;
1565     }
1566   }
1567   /* assembling */
1568   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1569   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1570   /*
1571   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1572   */
1573   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1574   if (pcbddc->use_change_of_basis) {
1575     PetscBool qr_needed = PETSC_FALSE;
1576     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1577     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1578     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1579     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1580     /* work arrays */
1581     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1582     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1583     /* nonzeros per row */
1584     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1585       if (PetscBTLookup(change_basis,i)) {
1586         qr_needed = PETSC_TRUE;
1587         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1588         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1589       }
1590     }
1591     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1592     ierr = PetscFree(nnz);CHKERRQ(ierr);
1593     /* Set initial identity in the matrix */
1594     for (i=0;i<pcis->n_B;i++) {
1595       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1596     }
1597 
1598     /* Now we loop on the constraints which need a change of basis */
1599     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1600     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1601     /* Change of basis matrix T computed via QR decomposition of constraints */
1602     if (qr_needed) {
1603       /* dual and primal dofs on a single cc */
1604       PetscInt     dual_dofs,primal_dofs;
1605       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1606       PetscInt     primal_counter;
1607       /* working stuff for GEQRF */
1608       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1609       PetscBLASInt lqr_work;
1610       /* working stuff for UNGQR */
1611       PetscScalar  *gqr_work,lgqr_work_t;
1612       PetscBLASInt lgqr_work;
1613       /* working stuff for TRTRS */
1614       PetscScalar  *trs_rhs;
1615       PetscBLASInt Blas_NRHS;
1616       /* pointers for values insertion into change of basis matrix */
1617       PetscInt     *start_rows,*start_cols;
1618       PetscScalar  *start_vals;
1619       /* working stuff for values insertion */
1620       PetscBT      is_primal;
1621 
1622       /* space to store Q */
1623       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1624       /* first we issue queries for optimal work */
1625       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1626       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1627       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1628       lqr_work = -1;
1629       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1630       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1631       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1632       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1633       lgqr_work = -1;
1634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1635       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1637       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1638       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1639       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1640       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1641       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1642       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1643       /* array to store scaling factors for reflectors */
1644       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1645       /* array to store rhs and solution of triangular solver */
1646       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1647       /* array to store whether a node is primal or not */
1648       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1649       for (i=0;i<total_primal_vertices;i++) {
1650         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1651       }
1652 
1653       /* allocating workspace for check */
1654       if (pcbddc->dbg_flag) {
1655         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1656         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1657         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1658       }
1659 
1660       /* loop on constraints and see whether or not they need a change of basis */
1661       /* -> using implicit ordering contained in temp_indices data */
1662       total_counts = pcbddc->n_vertices;
1663       primal_counter = total_counts;
1664       while (total_counts<pcbddc->local_primal_size) {
1665         primal_dofs = 1;
1666         if (PetscBTLookup(change_basis,total_counts)) {
1667           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1668           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1669             primal_dofs++;
1670           }
1671           /* get constraint info */
1672           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1673           dual_dofs = size_of_constraint-primal_dofs;
1674 
1675           /* copy quadrature constraints for change of basis check */
1676           if (pcbddc->dbg_flag) {
1677             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1678             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1679           }
1680 
1681           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1682           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1683 
1684           /* compute QR decomposition of constraints */
1685           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1686           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1687           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1688           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1689           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1690           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1691           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1692 
1693           /* explictly compute R^-T */
1694           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1695           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1696           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1697           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1699           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1700           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1701           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1702           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1703           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1704 
1705           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1706           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1709           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1710           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1711           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1712           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1713           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1714 
1715           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1716              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1717              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1719           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1720           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1721           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1722           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1723           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1724           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1725           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1726           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1727           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1728 
1729           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1730           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1731           /* insert cols for primal dofs */
1732           for (j=0;j<primal_dofs;j++) {
1733             start_vals = &qr_basis[j*size_of_constraint];
1734             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1735             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1736           }
1737           /* insert cols for dual dofs */
1738           for (j=0,k=0;j<dual_dofs;k++) {
1739             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1740               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1741               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1742               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1743               j++;
1744             }
1745           }
1746 
1747           /* check change of basis */
1748           if (pcbddc->dbg_flag) {
1749             PetscInt   ii,jj;
1750             PetscBool valid_qr=PETSC_TRUE;
1751             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1752             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1753             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1754             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1755             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1756             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1757             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1758             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1759             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1760             for (jj=0;jj<size_of_constraint;jj++) {
1761               for (ii=0;ii<primal_dofs;ii++) {
1762                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1763                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1764               }
1765             }
1766             if (!valid_qr) {
1767               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1768               for (jj=0;jj<size_of_constraint;jj++) {
1769                 for (ii=0;ii<primal_dofs;ii++) {
1770                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1771                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1772                   }
1773                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1774                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1775                   }
1776                 }
1777               }
1778             } else {
1779               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1780             }
1781           }
1782           /* increment primal counter */
1783           primal_counter += primal_dofs;
1784         } else {
1785           if (pcbddc->dbg_flag) {
1786             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1787           }
1788         }
1789         /* increment constraint counter total_counts */
1790         total_counts += primal_dofs;
1791       }
1792       if (pcbddc->dbg_flag) {
1793         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1794         ierr = PetscFree(work);CHKERRQ(ierr);
1795       }
1796       /* free workspace */
1797       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1798       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1799       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1800       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1801       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1802       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1803     }
1804     /* assembling */
1805     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807     /*
1808     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1809     */
1810   }
1811   /* free workspace */
1812   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1813   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1814   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1815   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1816   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1817   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1818   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1819   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1825 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1826 {
1827   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1828   PC_IS       *pcis = (PC_IS*)pc->data;
1829   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1830   PetscInt    bs,ierr,i,vertex_size;
1831   PetscViewer viewer=pcbddc->dbg_viewer;
1832 
1833   PetscFunctionBegin;
1834   /* Init local Graph struct */
1835   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1836 
1837   /* Check validity of the csr graph passed in by the user */
1838   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1839     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1840   }
1841   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1842   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1843     Mat mat_adj;
1844     const PetscInt *xadj,*adjncy;
1845     PetscBool flg_row=PETSC_TRUE;
1846 
1847     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1848     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1849     if (!flg_row) {
1850       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1851     }
1852     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1853     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1854     if (!flg_row) {
1855       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1856     }
1857     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1858   }
1859 
1860   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1861   vertex_size = 1;
1862   if (!pcbddc->n_ISForDofs) {
1863     IS *custom_ISForDofs;
1864 
1865     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1866     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1867     for (i=0;i<bs;i++) {
1868       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1869     }
1870     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1871     /* remove my references to IS objects */
1872     for (i=0;i<bs;i++) {
1873       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1874     }
1875     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1876   } else { /* mat block size as vertex size (used for elasticity) */
1877     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1878   }
1879 
1880   /* Setup of Graph */
1881   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1882 
1883   /* Graph's connected components analysis */
1884   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1885 
1886   /* print some info to stdout */
1887   if (pcbddc->dbg_flag) {
1888     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1895 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1896 {
1897   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1898   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   n = 0;
1903   vertices = 0;
1904   if (pcbddc->ConstraintMatrix) {
1905     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1906     for (i=0;i<local_primal_size;i++) {
1907       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1908       if (size_of_constraint == 1) n++;
1909       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1910     }
1911     if (vertices_idx) {
1912       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1913       n = 0;
1914       for (i=0;i<local_primal_size;i++) {
1915         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1916         if (size_of_constraint == 1) {
1917           vertices[n++]=row_cmat_indices[0];
1918         }
1919         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1920       }
1921     }
1922   }
1923   *n_vertices = n;
1924   if (vertices_idx) *vertices_idx = vertices;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1930 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1931 {
1932   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1933   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1934   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1935   PetscBT        touched;
1936   PetscErrorCode ierr;
1937 
1938     /* This function assumes that the number of local constraints per connected component
1939        is not greater than the number of nodes defined for the connected component
1940        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1941   PetscFunctionBegin;
1942   n = 0;
1943   constraints_index = 0;
1944   if (pcbddc->ConstraintMatrix) {
1945     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1946     max_size_of_constraint = 0;
1947     for (i=0;i<local_primal_size;i++) {
1948       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1949       if (size_of_constraint > 1) {
1950         n++;
1951       }
1952       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1953       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1954     }
1955     if (constraints_idx) {
1956       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1957       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1958       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1959       n = 0;
1960       for (i=0;i<local_primal_size;i++) {
1961         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1962         if (size_of_constraint > 1) {
1963           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1964           /* find first untouched local node */
1965           j = 0;
1966           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1967           min_index = row_cmat_global_indices[j];
1968           min_loc = j;
1969           /* search the minimum among nodes not yet touched on the connected component
1970              since there can be more than one constraint on a single cc */
1971           for (j=1;j<size_of_constraint;j++) {
1972             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1973               min_index = row_cmat_global_indices[j];
1974               min_loc = j;
1975             }
1976           }
1977           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1978           constraints_index[n++] = row_cmat_indices[min_loc];
1979         }
1980         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1981       }
1982       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1983       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1984     }
1985   }
1986   *n_constraints = n;
1987   if (constraints_idx) *constraints_idx = constraints_index;
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 /* the next two functions has been adapted from pcis.c */
1992 #undef __FUNCT__
1993 #define __FUNCT__ "PCBDDCApplySchur"
1994 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1995 {
1996   PetscErrorCode ierr;
1997   PC_IS          *pcis = (PC_IS*)(pc->data);
1998 
1999   PetscFunctionBegin;
2000   if (!vec2_B) { vec2_B = v; }
2001   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2002   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2003   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2004   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2005   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2011 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2012 {
2013   PetscErrorCode ierr;
2014   PC_IS          *pcis = (PC_IS*)(pc->data);
2015 
2016   PetscFunctionBegin;
2017   if (!vec2_B) { vec2_B = v; }
2018   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2019   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2020   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2021   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2022   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "PCBDDCSubsetNumbering"
2028 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2029 {
2030   Vec            local_vec,global_vec;
2031   IS             seqis,paris;
2032   VecScatter     scatter_ctx;
2033   PetscScalar    *array;
2034   PetscInt       *temp_global_dofs;
2035   PetscScalar    globalsum;
2036   PetscInt       i,j,s;
2037   PetscInt       nlocals,first_index,old_index,max_local;
2038   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2039   PetscMPIInt    *dof_sizes,*dof_displs;
2040   PetscBool      first_found;
2041   PetscErrorCode ierr;
2042 
2043   PetscFunctionBegin;
2044   /* mpi buffers */
2045   MPI_Comm_size(comm,&size_prec_comm);
2046   MPI_Comm_rank(comm,&rank_prec_comm);
2047   j = ( !rank_prec_comm ? size_prec_comm : 0);
2048   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2049   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2050   /* get maximum size of subset */
2051   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2052   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2053   max_local = 0;
2054   if (n_local_dofs) {
2055     max_local = temp_global_dofs[0];
2056     for (i=1;i<n_local_dofs;i++) {
2057       if (max_local < temp_global_dofs[i] ) {
2058         max_local = temp_global_dofs[i];
2059       }
2060     }
2061   }
2062   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2063   max_global++;
2064   max_local = 0;
2065   if (n_local_dofs) {
2066     max_local = local_dofs[0];
2067     for (i=1;i<n_local_dofs;i++) {
2068       if (max_local < local_dofs[i] ) {
2069         max_local = local_dofs[i];
2070       }
2071     }
2072   }
2073   max_local++;
2074   /* allocate workspace */
2075   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2076   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2077   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2078   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2079   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2080   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2081   /* create scatter */
2082   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2083   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2084   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2085   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2086   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2087   /* init array */
2088   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2089   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2090   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2091   if (local_dofs_mult) {
2092     for (i=0;i<n_local_dofs;i++) {
2093       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2094     }
2095   } else {
2096     for (i=0;i<n_local_dofs;i++) {
2097       array[local_dofs[i]]=1.0;
2098     }
2099   }
2100   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2101   /* scatter into global vec and get total number of global dofs */
2102   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2105   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2106   /* Fill global_vec with cumulative function for global numbering */
2107   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2108   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2109   nlocals = 0;
2110   first_index = -1;
2111   first_found = PETSC_FALSE;
2112   for (i=0;i<s;i++) {
2113     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2114       first_found = PETSC_TRUE;
2115       first_index = i;
2116     }
2117     nlocals += (PetscInt)PetscRealPart(array[i]);
2118   }
2119   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2120   if (!rank_prec_comm) {
2121     dof_displs[0]=0;
2122     for (i=1;i<size_prec_comm;i++) {
2123       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2124     }
2125   }
2126   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2127   if (first_found) {
2128     array[first_index] += (PetscScalar)nlocals;
2129     old_index = first_index;
2130     for (i=first_index+1;i<s;i++) {
2131       if (PetscRealPart(array[i]) > 0.0) {
2132         array[i] += array[old_index];
2133         old_index = i;
2134       }
2135     }
2136   }
2137   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2138   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2139   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2140   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2141   /* get global ordering of local dofs */
2142   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2143   if (local_dofs_mult) {
2144     for (i=0;i<n_local_dofs;i++) {
2145       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2146     }
2147   } else {
2148     for (i=0;i<n_local_dofs;i++) {
2149       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2150     }
2151   }
2152   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2153   /* free workspace */
2154   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2155   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2156   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2157   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2158   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2159   /* return pointer to global ordering of local dofs */
2160   *global_numbering_subset = temp_global_dofs;
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #undef __FUNCT__
2165 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2166 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2167 {
2168   PetscInt       i,j;
2169   PetscScalar    *alphas;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   /* this implements stabilized Gram-Schmidt */
2174   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2175   for (i=0;i<n;i++) {
2176     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2177     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2178     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2179   }
2180   ierr = PetscFree(alphas);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatConvert_IS_AIJ"
2187 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2188 {
2189   Mat new_mat;
2190   Mat_IS *matis = (Mat_IS*)(mat->data);
2191   /* info on mat */
2192   PetscInt bs,rows,cols;
2193   PetscInt lrows,lcols;
2194   PetscInt local_rows,local_cols;
2195   PetscMPIInt nsubdomains;
2196   /* preallocation */
2197   Vec vec_dnz,vec_onz;
2198   PetscScalar *my_dnz,*my_onz,*array;
2199   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2200   PetscInt  index_row,index_col,owner;
2201   PetscInt  *local_indices,*global_indices;
2202   /* work */
2203   PetscInt i,j;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   /* CHECKS */
2208   /* get info from mat */
2209   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2210   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2211   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2212   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2213 
2214   /* MAT_INITIAL_MATRIX */
2215   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2216   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2217   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2218   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2219   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2220   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2221 
2222   /*
2223     preallocation
2224   */
2225 
2226   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2227   /*
2228      Some vectors are needed to sum up properly on shared interface dofs.
2229      Note that preallocation is not exact, since it overestimates nonzeros
2230   */
2231 /*
2232   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2233   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2234   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2235   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2236 */
2237   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2238   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2239   /* All processes need to compute entire row ownership */
2240   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2241   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2242   for (i=0;i<nsubdomains;i++) {
2243     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2244       row_ownership[j]=i;
2245     }
2246   }
2247   /* map indices of local mat to global values */
2248   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2249   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2250   for (i=0;i<local_rows;i++) local_indices[i]=i;
2251   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2252   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2253 
2254   /*
2255      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2256      then, they will be summed up properly. This way, preallocation is always sufficient
2257   */
2258   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2259   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2260   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2261   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2262   for (i=0;i<local_rows;i++) {
2263     index_row = global_indices[i];
2264     for (j=i;j<local_rows;j++) {
2265       owner = row_ownership[index_row];
2266       index_col = global_indices[j];
2267       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2268         my_dnz[i] += 1.0;
2269       } else { /* offdiag block */
2270         my_onz[i] += 1.0;
2271       }
2272       /* same as before, interchanging rows and cols */
2273       if (i != j) {
2274         owner = row_ownership[index_col];
2275         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2276           my_dnz[j] += 1.0;
2277         } else {
2278           my_onz[j] += 1.0;
2279         }
2280       }
2281     }
2282   }
2283   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2284   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2285   if (local_rows) { /* multilevel guard */
2286     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2287     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2288   }
2289   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2290   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2291   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2292   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2293   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2294   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2295   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2296 
2297   /* set computed preallocation in dnz and onz */
2298   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2299   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2300   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2301   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2302   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2303   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2304   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2305   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2306 
2307   /* Resize preallocation if overestimated */
2308   for (i=0;i<lrows;i++) {
2309     dnz[i] = PetscMin(dnz[i],lcols);
2310     onz[i] = PetscMin(onz[i],cols-lcols);
2311   }
2312   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2313   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2314 
2315   /*
2316     Set values. Very Basic.
2317   */
2318   for (i=0;i<local_rows;i++) {
2319     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2320     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2321     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2322     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2323     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2324   }
2325   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2327   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328 
2329   /* get back mat */
2330   *M = new_mat;
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatISSubassemble_Private"
2336 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2337 {
2338   Mat             subdomain_adj;
2339   IS              new_ranks,ranks_send_to;
2340   MatPartitioning partitioner;
2341   Mat_IS          *matis;
2342   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2343   PetscInt        prank;
2344   PetscMPIInt     size,rank,color;
2345   PetscInt        *xadj,*adjncy,*oldranks;
2346   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2347   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2348   PetscErrorCode  ierr;
2349   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE,use_threshold=PETSC_FALSE;
2350   PetscSubcomm    subcomm;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscOptionsGetBool(NULL,"-matisc_use_square",&use_square,NULL);CHKERRQ(ierr);
2354   ierr = PetscOptionsGetBool(NULL,"-matisc_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2355   ierr = PetscOptionsGetInt(NULL,"-matisc_threshold",&threshold,&use_threshold);CHKERRQ(ierr);
2356 
2357   /* Get info on mapping */
2358   matis = (Mat_IS*)(mat->data);
2359   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2360   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2361 
2362   /* build local CSR graph of subdomains' connectivity */
2363   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2364   xadj[0] = 0;
2365   xadj[1] = PetscMax(n_neighs-1,0);
2366   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2367   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2368 
2369   if (use_threshold) {
2370     PetscInt* count;
2371     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2372     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2373     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2374       for (j=0;j<n_shared[i];j++) {
2375         count[shared[i][j]] += 1;
2376       }
2377     }
2378     xadj[1] = 0;
2379     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2380       for (j=0;j<n_shared[i];j++) {
2381         if (count[shared[i][j]] < threshold) {
2382           adjncy[xadj[1]] = neighs[i];
2383           adjncy_wgt[xadj[1]] = n_shared[i];
2384           xadj[1]++;
2385           break;
2386         }
2387       }
2388     }
2389     ierr = PetscFree(count);CHKERRQ(ierr);
2390   } else {
2391     if (xadj[1]) {
2392       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2393       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2394     }
2395   }
2396   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2397   if (use_square) {
2398     for (i=0;i<xadj[1];i++) {
2399       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2400     }
2401   }
2402   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2403 
2404   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2405 
2406   /*
2407     Restrict work on active processes only.
2408   */
2409   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2410   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2411   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2412   ierr = PetscMPIIntCast(!n_neighs,&color);CHKERRQ(ierr);
2413   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2414   if (color) {
2415     ierr = PetscFree(xadj);CHKERRQ(ierr);
2416     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2417     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2418   } else {
2419     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2420     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2421     prank = rank;
2422     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2423     for (i=0;i<size;i++) {
2424       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2425     }
2426     for (i=0;i<xadj[1];i++) {
2427       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2428     }
2429     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2430     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2431     n_subdomains = (PetscInt)size;
2432     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2433 
2434     /* Partition */
2435     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2436     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2437     if (use_vwgt) {
2438       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2439       v_wgt[0] = local_size;
2440       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2441     }
2442     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2443     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2444     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2445     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2446     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2447     ierr = ISView(new_ranks,0);CHKERRQ(ierr);
2448 
2449     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2450     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2451     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2452     /* clean up */
2453     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2454     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2455     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2456     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2457   }
2458   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2459 
2460   /* assemble parallel IS for sends */
2461   i = 1;
2462   if (color) i=0;
2463   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2464   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2465 
2466   /* get back IS */
2467   *is_sends = ranks_send_to;
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatISSubassemble"
2475 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2476 {
2477   Mat                    local_mat,new_mat;
2478   Mat_IS                 *matis;
2479   IS                     is_sends_internal;
2480   PetscInt               rows,cols;
2481   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2482   PetscBool              ismatis,isdense;
2483   ISLocalToGlobalMapping l2gmap;
2484   PetscInt*              l2gmap_indices;
2485   const PetscInt*        is_indices;
2486   MatType                new_local_type;
2487   MatTypePrivate         new_local_type_private;
2488   /* buffers */
2489   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2490   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2491   /* MPI */
2492   MPI_Comm               comm;
2493   PetscMPIInt            n_sends,n_recvs,commsize;
2494   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2495   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2496   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2497   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2498   PetscErrorCode         ierr;
2499 
2500   PetscFunctionBegin;
2501   /* checks */
2502   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2503   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2504   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2505   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2506   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2507   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2508   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2509   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2510   PetscValidLogicalCollectiveInt(mat,bs,0);
2511   /* prepare IS for sending if not provided */
2512   if (!is_sends) {
2513     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2514     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2515   } else {
2516     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2517     is_sends_internal = is_sends;
2518   }
2519 
2520   /* get pointer of MATIS data */
2521   matis = (Mat_IS*)mat->data;
2522 
2523   /* get comm */
2524   comm = PetscObjectComm((PetscObject)mat);
2525 
2526   /* compute number of sends */
2527   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2528   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2529 
2530   /* compute number of receives */
2531   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2532   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2533   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2534   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2535   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2536   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2537   ierr = PetscFree(iflags);CHKERRQ(ierr);
2538 
2539   /* prepare send/receive buffers */
2540   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2541   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2542   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2543   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2544 
2545   /* Get data from local mat */
2546   if (!isdense) {
2547     /* TODO: See below some guidelines on how to prepare the local buffers */
2548     /*
2549        send_buffer_vals should contain the raw values of the local matrix
2550        send_buffer_idxs should contain:
2551        - MatType_PRIVATE type
2552        - PetscInt        size_of_l2gmap
2553        - PetscInt        global_row_indices[size_of_l2gmap]
2554        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2555     */
2556   } else {
2557     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2558     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2559     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2560     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2561     send_buffer_idxs[1] = i;
2562     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2563     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2564     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2565     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2566     for (i=0;i<n_sends;i++) {
2567       ilengths_vals[is_indices[i]] = len*len;
2568       ilengths_idxs[is_indices[i]] = len+2;
2569     }
2570   }
2571   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2572   buf_size_idxs = 0;
2573   buf_size_vals = 0;
2574   for (i=0;i<n_recvs;i++) {
2575     buf_size_idxs += (PetscInt)olengths_idxs[i];
2576     buf_size_vals += (PetscInt)olengths_vals[i];
2577   }
2578   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2579   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2580 
2581   /* get new tags for clean communications */
2582   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2583   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2584 
2585   /* allocate for requests */
2586   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2587   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2588   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2589   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2590 
2591   /* communications */
2592   ptr_idxs = recv_buffer_idxs;
2593   ptr_vals = recv_buffer_vals;
2594   for (i=0;i<n_recvs;i++) {
2595     source_dest = onodes[i];
2596     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2597     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2598     ptr_idxs += olengths_idxs[i];
2599     ptr_vals += olengths_vals[i];
2600   }
2601   for (i=0;i<n_sends;i++) {
2602     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2603     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2604     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2605   }
2606   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2607   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2608 
2609   /* assemble new l2g map */
2610   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2611   ptr_idxs = recv_buffer_idxs;
2612   buf_size_idxs = 0;
2613   for (i=0;i<n_recvs;i++) {
2614     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2615     ptr_idxs += olengths_idxs[i];
2616   }
2617   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2618   ptr_idxs = recv_buffer_idxs;
2619   buf_size_idxs = 0;
2620   for (i=0;i<n_recvs;i++) {
2621     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2622     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2623     ptr_idxs += olengths_idxs[i];
2624   }
2625   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2626   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2627   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2628 
2629   /* infer new local matrix type from received local matrices type */
2630   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2631   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2632   new_local_type_private = MATAIJ_PRIVATE;
2633   new_local_type = MATSEQAIJ;
2634   if (n_recvs) {
2635     new_local_type_private = send_buffer_idxs[0];
2636     ptr_idxs = recv_buffer_idxs;
2637     for (i=0;i<n_recvs;i++) {
2638       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2639         new_local_type_private = MATAIJ_PRIVATE;
2640         break;
2641       }
2642       ptr_idxs += olengths_idxs[i];
2643     }
2644     switch (new_local_type_private) {
2645       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2646         new_local_type = MATSEQAIJ;
2647         bs = 1;
2648         break;
2649       case MATAIJ_PRIVATE:
2650         new_local_type = MATSEQAIJ;
2651         bs = 1;
2652         break;
2653       case MATBAIJ_PRIVATE:
2654         new_local_type = MATSEQBAIJ;
2655         break;
2656       case MATSBAIJ_PRIVATE:
2657         new_local_type = MATSEQSBAIJ;
2658         break;
2659       default:
2660         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2661         break;
2662     }
2663   }
2664 
2665   /* create MATIS object */
2666   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2667   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2668   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2669   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2670   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2671   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2672 
2673   /* set values */
2674   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2675   ptr_vals = recv_buffer_vals;
2676   ptr_idxs = recv_buffer_idxs;
2677   for (i=0;i<n_recvs;i++) {
2678     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2679       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2680       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2681       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2682       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2683       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2684     }
2685     ptr_idxs += olengths_idxs[i];
2686     ptr_vals += olengths_vals[i];
2687   }
2688   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2689   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2692 
2693   { /* check */
2694     Vec       lvec,rvec;
2695     PetscReal infty_error;
2696 
2697     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2698     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2699     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2700     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2701     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2702     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2703     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2704     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2705     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2706   }
2707 
2708   /* free workspace */
2709   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2710   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2711   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2712   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2713   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2714   if (isdense) {
2715     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2716     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2717   } else {
2718     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2719   }
2720   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2721   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2722   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2723   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2724   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2725   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2726   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2727   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2728   ierr = PetscFree(onodes);CHKERRQ(ierr);
2729   /* get back new mat */
2730   *mat_n = new_mat;
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2736 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2737 {
2738   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2739   PC_IS                  *pcis = (PC_IS*)pc->data;
2740   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2741   MatNullSpace           CoarseNullSpace=NULL;
2742   ISLocalToGlobalMapping coarse_islg;
2743   IS                     coarse_is;
2744   PetscInt               coarse_size,im_active,active_procs,*local_primal_indices=NULL;
2745   PCType                 coarse_pc_type;
2746   KSPType                coarse_ksp_type;
2747   PetscBool              setsym,issym=PETSC_FALSE;
2748   PetscErrorCode         ierr;
2749 
2750   PetscFunctionBegin;
2751   /* TODO: remove this and coarse ennums */
2752   if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) {
2753     pcbddc->coarse_problem_type = PARALLEL_BDDC;
2754   }
2755 
2756   /* Assign global numbering to coarse dofs */
2757   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2758 
2759   /* count "active processes" */
2760   im_active = 0;
2761   if (pcis->n) im_active = 1;
2762   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2763 
2764   /* adapt coarse problem type if needed */
2765   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2766     if (pcbddc->current_level < pcbddc->max_levels) {
2767       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2768         if (pcbddc->dbg_flag) {
2769           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active %d,ratio %d). Using direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2770          ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2771         }
2772         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2773       }
2774     } else {
2775       if (pcbddc->dbg_flag) {
2776         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2777         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2778       }
2779       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2780     }
2781   }
2782 
2783   /* creates MATIS object for coarse matrix */
2784   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2785   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2786   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2787   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2788   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2789   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791 
2792   /* Convert or subassemble MATIS depending on chosen coarse solver approach */
2793   if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2794     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2795     /* just for now */
2796     coarse_pc_type  = PCREDUNDANT;
2797     coarse_ksp_type = KSPPREONLY;
2798   } else {
2799     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2800     /* just for now */
2801     coarse_pc_type  = PCBDDC;
2802     coarse_ksp_type = KSPRICHARDSON;
2803   }
2804 
2805   /* propagate symmetry info to coarse matrix */
2806   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2807   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2808 
2809   /* create local to global scatters coarse problem */
2810   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2811   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2812 
2813   /* free memory no longer needed */
2814   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2815   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2816   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2817   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2818   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2819 
2820   /* Compute coarse null space */
2821   if (pcbddc->NullSpace) {
2822     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2823   }
2824 
2825   /* KSP for coarse problem */
2826   {
2827     PC        pc_temp;
2828     PetscBool isbddc=PETSC_FALSE;
2829     PetscInt  max_it;
2830 
2831     if (!pcbddc->coarse_ksp) { /* create the coarse KSP once */
2832       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2833       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2834       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2835       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2836       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2837       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2838       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2839     }
2840     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2841     /* Allow user's customization */
2842     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2843     /* Set Up PC for coarse problem BDDC */
2844     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2845       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2846       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2847       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2848       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
2849       if (CoarseNullSpace) {
2850         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2851       }
2852       if (pcbddc->dbg_flag) {
2853         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"----------------Level %d -> Setting up next level---------------\n",pcbddc->current_level);CHKERRQ(ierr);
2854         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2855       }
2856     } else {
2857       if (CoarseNullSpace) {
2858         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2859       }
2860     }
2861     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2862     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2863 
2864     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2865     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2866     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2867     if (max_it == 1) {
2868       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2869       if (isbddc) {
2870         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2871       }
2872     }
2873   }
2874 
2875   /* Check coarse problem if requested */
2876   if (pcbddc->dbg_flag) {
2877     KSP check_ksp;
2878     PC  check_pc;
2879     Vec check_vec;
2880     PetscInt its;
2881     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
2882     KSPType check_ksp_type;
2883 
2884     /* Create ksp object suitable for extreme eigenvalues' estimation */
2885     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr);
2886     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2887     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2888     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2889       if (issym) check_ksp_type = KSPCG;
2890       else check_ksp_type = KSPGMRES;
2891       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
2892     } else {
2893       check_ksp_type = KSPPREONLY;
2894     }
2895     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2896     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2897     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2898     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2899     /* create random vec */
2900     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2901     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2902     if (CoarseNullSpace) {
2903       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2904     }
2905     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2906     /* solve coarse problem */
2907     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2908     if (CoarseNullSpace) {
2909       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2910     }
2911     /* check coarse problem residual error */
2912     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2913     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2914     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2915     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2916     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2917     /* get eigenvalue estimation if inexact */
2918     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2919       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
2920       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
2921       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",its,check_ksp_type);CHKERRQ(ierr);
2922       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
2923     }
2924     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
2925     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
2926     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
2927   }
2928   if (pcbddc->dbg_flag) {
2929     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2930   }
2931   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
2932   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
2938 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
2939 {
2940   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2941   PC_IS*         pcis = (PC_IS*)pc->data;
2942   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
2943   PetscInt       i,j,coarse_size;
2944   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
2945   PetscErrorCode ierr;
2946 
2947   PetscFunctionBegin;
2948   /* get indices in local ordering for vertices and constraints */
2949   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2950   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2951   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2952   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2953   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2954   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2955   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2956 
2957   /* Compute global number of coarse dofs */
2958   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2959 
2960   /* check numbering */
2961   if (pcbddc->dbg_flag) {
2962     PetscScalar coarsesum,*array;
2963 
2964     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
2967     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2968     for (i=0;i<pcbddc->local_primal_size;i++) {
2969       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2970     }
2971     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
2972     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
2973     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2974     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2975     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2976     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2977     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2978     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2979     for (i=0;i<pcis->n;i++) {
2980       if (array[i] == 1.0) {
2981         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
2982       }
2983     }
2984     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2985     for (i=0;i<pcis->n;i++) {
2986       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2987     }
2988     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2989     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2990     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2991     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2992     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2993     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
2994     if (pcbddc->dbg_flag > 1) {
2995       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2996       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2997       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2998       for (i=0;i<pcbddc->local_primal_size;i++) {
2999         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3000       }
3001       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3002     }
3003     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3004   }
3005   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3006   /* get back data */
3007   *coarse_size_n = coarse_size;
3008   *local_primal_indices_n = local_primal_indices;
3009   PetscFunctionReturn(0);
3010 }
3011 
3012