xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision f347579b2c740aaf2706e3a53ee08f00957c2ec2)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_constraints,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131   n_R = pcis->n-pcbddc->n_actual_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (pcbddc->local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189   const PetscScalar      *array,*row_cmat_values;
190   const PetscInt         *row_cmat_indices,*idx_R_local;
191   PetscInt               *idx_V_B,*auxindices;
192   PetscInt               n_vertices,n_constraints,size_of_constraint;
193   PetscInt               i,j,n_R,n_D,n_B;
194   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check;
195   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196   MatType                impMatType;
197   /* some shortcuts to scalars */
198   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199   /* for debugging purposes */
200   PetscReal              *coarsefunctions_errors,*constraints_errors;
201 
202   PetscFunctionBegin;
203   /* get number of vertices (corners plus constraints with change of basis)
204      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205   n_vertices = pcbddc->n_actual_vertices;
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
220   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
221   if (i != n_vertices) {
222     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223   }
224 
225   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226   if (n_constraints) {
227     /* see if we can save some allocations */
228     if (pcbddc->local_auxmat2) {
229       PetscInt on_R,on_constraints;
230       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
231       if (on_R != n_R || on_constraints != n_constraints) {
232         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
233         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
234       }
235     }
236     /* work vectors */
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
239     /* auxiliary matrices */
240     if (!pcbddc->local_auxmat2) {
241       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
242       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
243       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
244       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
245     }
246 
247     /* Extract constraints on R nodes: C_{CR}  */
248     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
249     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
250     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
251 
252     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253     for (i=0;i<n_constraints;i++) {
254       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
255       /* Get row of constraint matrix in R numbering */
256       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
258       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
260       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
261       /* Solve for row of constraint matrix in R numbering */
262       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
263       /* Set values in local_auxmat2 */
264       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
266       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267     }
268     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
271 
272     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
274     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
275     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
276     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
277     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(M1);CHKERRQ(ierr);
279     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
280     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
281     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
282     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
283     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
284     ierr = MatDestroy(&M2);CHKERRQ(ierr);
285     ierr = MatDestroy(&M3);CHKERRQ(ierr);
286     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287     if (!pcbddc->local_auxmat1) {
288       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     } else {
290       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     }
292   }
293 
294   /* Get submatrices from subdomain matrix */
295   if (n_vertices) {
296     PetscInt ibs,mbs;
297     PetscBool issbaij;
298     Mat newmat;
299 
300     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
301     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
302     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
303     if (ibs != mbs) { /* need to convert to SEQAIJ */
304       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
308       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
309     } else {
310       /* this is safe */
311       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
313       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
315         /* which of the two approaches is faster? */
316         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
318         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
320         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
321       } else {
322         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       }
325     }
326     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
327     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
328     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
329   }
330 
331   /* Matrix of coarse basis functions (local) */
332   if (pcbddc->coarse_phi_B) {
333     PetscInt on_B,on_primal;
334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
335     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
337       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
338     }
339   }
340   if (pcbddc->coarse_phi_D) {
341     PetscInt on_D,on_primal;
342     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
343     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
345       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
346     }
347   }
348   if (!pcbddc->coarse_phi_B) {
349     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
350     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
351     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
352     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
353   }
354   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
356     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
357     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
358     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
359   }
360 
361   if (pcbddc->dbg_flag) {
362     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
364     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
365   }
366   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
370 
371   /* vertices */
372   for (i=0;i<n_vertices;i++) {
373     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
375     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
376     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
377     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
378     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
379     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
383     if (n_constraints) {
384       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
385       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
387     }
388     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
389     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
390 
391     /* Set values in coarse basis function and subdomain part of coarse_mat */
392     /* coarse basis functions */
393     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
394     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
400     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
405       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406     }
407     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     if (n_constraints) {
412       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
413       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
414       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     }
416 
417     /* check */
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on local nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
426       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
427       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
429       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       if (n_constraints) {
433         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       }
437       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
440       /* check saddle point solution */
441       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
444       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
445       /* shift by the identity matrix */
446       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
450     }
451   }
452 
453   /* constraints */
454   for (i=0;i<n_constraints;i++) {
455     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
456     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
457     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
458     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
459     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
461     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
462     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
463     if (n_vertices) {
464       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
465     }
466     /* Set values in coarse basis function and subdomain part of coarse_mat */
467     /* coarse basis functions */
468     j = i+n_vertices; /* don't touch this! */
469     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
470     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
474     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481     }
482     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483     if (n_vertices) {
484       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
485       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
487     }
488     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
489     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
491 
492     if (pcbddc->dbg_flag) {
493       /* assemble subdomain vector on nodes */
494       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
495       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
499       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
500       /* assemble subdomain vector of lagrange multipliers */
501       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
502       if (n_vertices) {
503         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
505         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506       }
507       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
513       /* check saddle point solution */
514       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
517       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
518       /* shift by the identity matrix */
519       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
520       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
523     }
524   }
525   /* call assembling routines for local coarse basis */
526   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   }
532 
533   /* compute other basis functions for non-symmetric problems */
534   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
535   if (!setsym || (setsym && !issym)) {
536     if (!pcbddc->coarse_psi_B) {
537       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
538       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
539       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
540       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
541     }
542     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
544       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
545       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
546       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
547     }
548     for (i=0;i<pcbddc->local_primal_size;i++) {
549       if (n_constraints) {
550         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
551         for (j=0;j<n_constraints;j++) {
552           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
553         }
554         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
555         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
556       }
557       if (i<n_vertices) {
558         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
559         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
560         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
561         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
562         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
563         if (n_constraints) {
564           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
565         }
566       } else {
567         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
568       }
569       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
570       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
571       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
575       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       if (i<n_vertices) {
577         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
578       }
579       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
584         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585       }
586 
587       if (pcbddc->dbg_flag) {
588         /* assemble subdomain vector on nodes */
589         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
590         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
592         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         if (i<n_vertices) {
594           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
595         }
596         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
597         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
598         /* assemble subdomain vector of lagrange multipliers */
599         for (j=0;j<pcbddc->local_primal_size;j++) {
600           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
601         }
602         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
603         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
604         /* check saddle point solution */
605         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
608         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
609         /* shift by the identity matrix */
610         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
611         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
614       }
615     }
616     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     }
622     unsymmetric_check = PETSC_TRUE;
623   } else { /* take references to already computed coarse basis */
624     unsymmetric_check = PETSC_FALSE;
625     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
626     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
627     if (pcbddc->coarse_phi_D) {
628       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
629       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
630     }
631   }
632   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
633   /* Checking coarse_sub_mat and coarse basis functios */
634   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
635   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   if (pcbddc->dbg_flag) {
637     Mat         coarse_sub_mat;
638     Mat         AUXMAT,TM1,TM2,TM3,TM4;
639     Mat         coarse_phi_D,coarse_phi_B;
640     Mat         coarse_psi_D,coarse_psi_B;
641     Mat         A_II,A_BB,A_IB,A_BI;
642     MatType     checkmattype=MATSEQAIJ;
643     PetscReal   real_value;
644 
645     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
646     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
649     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
651     if (unsymmetric_check) {
652       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
653       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
654     }
655     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
656 
657     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
659     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
660     if (unsymmetric_check) {
661       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
665       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
667       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
668       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
669       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
670       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673     } else {
674       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
675       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
676       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
677       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
678       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
679       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
680       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
681       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
682     }
683     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
684     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
687     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
688     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
689     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
694     for (i=0;i<pcbddc->local_primal_size;i++) {
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
696     }
697     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
698     for (i=0;i<pcbddc->local_primal_size;i++) {
699       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
700     }
701     if (unsymmetric_check) {
702       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
703       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
704         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
705       }
706       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
707       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
708         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
709       }
710     }
711     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
712     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
716     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
720     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
722     if (unsymmetric_check) {
723       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
724       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
725     }
726     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
727     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
728     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
729     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
730   }
731   /* free memory */
732   if (n_vertices) {
733     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
734     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
735     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
738   }
739   if (n_constraints) {
740     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
741     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
742     ierr = MatDestroy(&M1);CHKERRQ(ierr);
743     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
744   }
745   ierr = PetscFree(auxindices);CHKERRQ(ierr);
746   /* get back data */
747   *coarse_submat_vals_n = coarse_submat_vals;
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
754 {
755   PC_IS*            pcis = (PC_IS*)(pc->data);
756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
758   PetscBool         issbaij,isseqaij;
759   /* manage repeated solves */
760   MatReuse          reuse;
761   MatStructure      matstruct;
762   PetscErrorCode    ierr;
763 
764   PetscFunctionBegin;
765   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
766     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767   }
768   /* get mat flags */
769   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
770   reuse = MAT_INITIAL_MATRIX;
771   if (pc->setupcalled) {
772     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
773     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
774     if (matstruct == SAME_NONZERO_PATTERN) {
775       reuse = MAT_REUSE_MATRIX;
776     } else {
777       reuse = MAT_INITIAL_MATRIX;
778     }
779   }
780   if (reuse == MAT_INITIAL_MATRIX) {
781     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
783     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
784     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
785     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
786   }
787 
788   /* transform local matrices if needed */
789   if (pcbddc->use_change_of_basis) {
790     Mat         change_mat_all;
791     PetscScalar *row_cmat_values;
792     PetscInt    *row_cmat_indices;
793     PetscInt    *nnz,*is_indices,*temp_indices;
794     PetscInt    i,j,k,n_D,n_B;
795 
796     /* Get Non-overlapping dimensions */
797     n_B = pcis->n_B;
798     n_D = pcis->n-n_B;
799 
800     /* compute nonzero structure of change of basis on all local nodes */
801     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
802     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
804     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
805     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
806     k=1;
807     for (i=0;i<n_B;i++) {
808       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
809       nnz[is_indices[i]]=j;
810       if (k < j) k = j;
811       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
812     }
813     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     /* assemble change of basis matrix on the whole set of local dofs */
815     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
816     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
817     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
818     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
819     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
820     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
821     for (i=0;i<n_D;i++) {
822       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
823     }
824     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
826     for (i=0;i<n_B;i++) {
827       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
828       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
829       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
830       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
831     }
832     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
835     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
836     if (isseqaij) {
837       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
838     } else {
839       Mat work_mat;
840       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
841       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
842       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
843     }
844     /*
845     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
846     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
847     */
848     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
849     ierr = PetscFree(nnz);CHKERRQ(ierr);
850     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
851   } else {
852     /* without change of basis, the local matrix is unchanged */
853     if (!pcbddc->local_mat) {
854       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
855       pcbddc->local_mat = matis->A;
856     }
857   }
858 
859   /* get submatrices */
860   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
861   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
862   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
863   if (!issbaij) {
864     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
865     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
866   } else {
867     Mat newmat;
868     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
869     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
870     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
871     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
879 {
880   PC_IS*         pcis = (PC_IS*)(pc->data);
881   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
882   IS             is_aux1,is_aux2;
883   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
884   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
885   PetscInt       vbs,bs;
886   PetscBT        bitmask;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   /*
891     No need to setup local scatters if
892       - primal space is unchanged
893         AND
894       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
895         AND
896       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
897   */
898   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
899     PetscFunctionReturn(0);
900   }
901   /* destroy old objects */
902   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
903   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
904   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
905   /* Set Non-overlapping dimensions */
906   n_B = pcis->n_B; n_D = pcis->n - n_B;
907   n_vertices = pcbddc->n_actual_vertices;
908   /* create auxiliary bitmask */
909   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
910   for (i=0;i<n_vertices;i++) {
911     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
912   }
913 
914   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
915   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
916   for (i=0, n_R=0; i<pcis->n; i++) {
917     if (!PetscBTLookup(bitmask,i)) {
918       idx_R_local[n_R] = i;
919       n_R++;
920     }
921   }
922 
923   /* Block code */
924   vbs = 1;
925   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
926   if (bs>1 && !(n_vertices%bs)) {
927     PetscBool is_blocked = PETSC_TRUE;
928     PetscInt  *vary;
929     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
930     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
931     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
932     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
933     for (i=0; i<n_vertices; i++) {
934       if (vary[i]!=0 && vary[i]!=bs) {
935         is_blocked = PETSC_FALSE;
936         break;
937       }
938     }
939     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
940       vbs = bs;
941       for (i=0;i<n_R/vbs;i++) {
942         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
943       }
944     }
945     ierr = PetscFree(vary);CHKERRQ(ierr);
946   }
947   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
948   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
949 
950   /* print some info if requested */
951   if (pcbddc->dbg_flag) {
952     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
953     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
956     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
957     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
960   }
961 
962   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
963   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
964   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
965   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
966   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   for (i=0; i<n_D; i++) {
968     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
969   }
970   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
971   for (i=0, j=0; i<n_R; i++) {
972     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
973       aux_array1[j++] = i;
974     }
975   }
976   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
977   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
978   for (i=0, j=0; i<n_B; i++) {
979     if (!PetscBTLookup(bitmask,is_indices[i])) {
980       aux_array2[j++] = i;
981     }
982   }
983   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
984   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
985   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
986   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
987   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
988 
989   if (pcbddc->switch_static || pcbddc->dbg_flag) {
990     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
991     for (i=0, j=0; i<n_R; i++) {
992       if (PetscBTLookup(bitmask,idx_R_local[i])) {
993         aux_array1[j++] = i;
994       }
995     }
996     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
997     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
998     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
999   }
1000   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1001   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1009 {
1010   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1011   PC_IS          *pcis = (PC_IS*)pc->data;
1012   PC             pc_temp;
1013   Mat            A_RR;
1014   MatStructure   matstruct;
1015   MatReuse       reuse;
1016   PetscScalar    m_one = -1.0;
1017   PetscReal      value;
1018   PetscInt       n_D,n_R,ibs,mbs;
1019   PetscBool      use_exact,use_exact_reduced,issbaij;
1020   PetscErrorCode ierr;
1021   /* prefixes stuff */
1022   char           dir_prefix[256],neu_prefix[256],str_level[16];
1023   size_t         len;
1024 
1025   PetscFunctionBegin;
1026   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1027 
1028   /* compute prefixes */
1029   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1030   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1031   if (!pcbddc->current_level) {
1032     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1033     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1034     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1035     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1036   } else {
1037     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1038     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1039     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1040     len -= 15; /* remove "pc_bddc_coarse_" */
1041     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1042     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1043     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1044     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1045     *(dir_prefix+len)='\0';
1046     *(neu_prefix+len)='\0';
1047     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1048     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1049     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1050     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1051   }
1052 
1053   /* DIRICHLET PROBLEM */
1054   /* Matrix for Dirichlet problem is pcis->A_II */
1055   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1056   if (!pcbddc->ksp_D) { /* create object if not yet build */
1057     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1058     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1059     /* default */
1060     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1061     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1062     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1063     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1064     if (issbaij) {
1065       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1066     } else {
1067       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1068     }
1069     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1070     /* Allow user's customization */
1071     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1072   }
1073   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1074   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1075   if (!n_D) {
1076     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1077     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1078   }
1079   /* Set Up KSP for Dirichlet problem of BDDC */
1080   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1081   /* set ksp_D into pcis data */
1082   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1083   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1084   pcis->ksp_D = pcbddc->ksp_D;
1085 
1086   /* NEUMANN PROBLEM */
1087   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1088   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1089   if (pcbddc->ksp_R) { /* already created ksp */
1090     PetscInt nn_R;
1091     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1092     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1093     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1094     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1095       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1096       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1097       reuse = MAT_INITIAL_MATRIX;
1098     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1099       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1100         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1101         reuse = MAT_INITIAL_MATRIX;
1102       } else { /* safe to reuse the matrix */
1103         reuse = MAT_REUSE_MATRIX;
1104       }
1105     }
1106     /* last check */
1107     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1108       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1109       reuse = MAT_INITIAL_MATRIX;
1110     }
1111   } else { /* first time, so we need to create the matrix */
1112     reuse = MAT_INITIAL_MATRIX;
1113   }
1114   /* extract A_RR */
1115   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1116   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1117   if (ibs != mbs) {
1118     Mat newmat;
1119     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1120     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1121     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1122   } else {
1123     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1124   }
1125   if (!pcbddc->ksp_R) { /* create object if not present */
1126     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1127     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1128     /* default */
1129     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1130     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1131     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1132     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1133     if (issbaij) {
1134       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1135     } else {
1136       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1137     }
1138     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1139     /* Allow user's customization */
1140     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1141   }
1142   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1143   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1144   if (!n_R) {
1145     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1146     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1147   }
1148   /* Set Up KSP for Neumann problem of BDDC */
1149   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1150 
1151   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1152   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1153     /* Dirichlet */
1154     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1155     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1156     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1157     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1158     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1159     /* need to be adapted? */
1160     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1161     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1162     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1163     /* print info */
1164     if (pcbddc->dbg_flag) {
1165       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1166       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1168       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1169       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1170       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1171     }
1172     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1173       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1174     }
1175 
1176     /* Neumann */
1177     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1178     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1179     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1180     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1181     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1182     /* need to be adapted? */
1183     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1184     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1185     /* print info */
1186     if (pcbddc->dbg_flag) {
1187       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1188       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1189     }
1190     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1191       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1192     }
1193   }
1194   /* free Neumann problem's matrix */
1195   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1201 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1202 {
1203   PetscErrorCode ierr;
1204   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1205 
1206   PetscFunctionBegin;
1207   if (applytranspose) {
1208     if (pcbddc->local_auxmat1) {
1209       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1210       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1211     }
1212     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1213   } else {
1214     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1215     if (pcbddc->local_auxmat1) {
1216       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1217       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1218     }
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1224 #undef __FUNCT__
1225 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1226 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1227 {
1228   PetscErrorCode ierr;
1229   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1230   PC_IS*            pcis = (PC_IS*)  (pc->data);
1231   const PetscScalar zero = 0.0;
1232 
1233   PetscFunctionBegin;
1234   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1235   if (applytranspose) {
1236     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1237     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1238   } else {
1239     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1240     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1241   }
1242   /* start communications from local primal nodes to rhs of coarse solver */
1243   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1244   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1248   /* TODO remove null space when doing multilevel */
1249   if (pcbddc->coarse_ksp) {
1250     if (applytranspose) {
1251       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1252     } else {
1253       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1254     }
1255   }
1256   /* start communications from coarse solver solution to local primal nodes */
1257   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258 
1259   /* Local solution on R nodes */
1260   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1261   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1262   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1263   if (pcbddc->switch_static) {
1264     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1265     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1266   }
1267   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1268   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1269   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271   if (pcbddc->switch_static) {
1272     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   }
1275 
1276   /* complete communications from coarse sol to local primal nodes */
1277   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1278 
1279   /* Sum contributions from two levels */
1280   if (applytranspose) {
1281     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1282     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1283   } else {
1284     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1285     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1291 #undef __FUNCT__
1292 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1293 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1294 {
1295   PetscErrorCode ierr;
1296   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1297   PetscScalar    *array,*array2;
1298   Vec            from,to;
1299 
1300   PetscFunctionBegin;
1301   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1302     from = pcbddc->coarse_vec;
1303     to = pcbddc->vec1_P;
1304     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1305       Vec tvec;
1306       PetscInt lsize;
1307       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1308       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1309       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1310       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1311       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1312       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1313       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1314     }
1315   } else { /* from local to global -> put data in coarse right hand side */
1316     from = pcbddc->vec1_P;
1317     to = pcbddc->coarse_vec;
1318   }
1319   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1325 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1326 {
1327   PetscErrorCode ierr;
1328   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1329   PetscScalar    *array,*array2;
1330   Vec            from,to;
1331 
1332   PetscFunctionBegin;
1333   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1334     from = pcbddc->coarse_vec;
1335     to = pcbddc->vec1_P;
1336   } else { /* from local to global -> put data in coarse right hand side */
1337     from = pcbddc->vec1_P;
1338     to = pcbddc->coarse_vec;
1339   }
1340   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1341   if (smode == SCATTER_FORWARD) {
1342     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1343       Vec tvec;
1344       PetscInt lsize;
1345       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1346       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1347       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1348       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1349       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1350       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1351       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 /* uncomment for testing purposes */
1358 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1359 #undef __FUNCT__
1360 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1361 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1362 {
1363   PetscErrorCode    ierr;
1364   PC_IS*            pcis = (PC_IS*)(pc->data);
1365   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1366   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1367   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1368   MatType           impMatType=MATSEQAIJ;
1369   /* one and zero */
1370   PetscScalar       one=1.0,zero=0.0;
1371   /* space to store constraints and their local indices */
1372   PetscScalar       *temp_quadrature_constraint;
1373   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1374   /* iterators */
1375   PetscInt          i,j,k,total_counts,temp_start_ptr;
1376   /* stuff to store connected components stored in pcbddc->mat_graph */
1377   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1378   PetscInt          n_ISForFaces,n_ISForEdges;
1379   /* near null space stuff */
1380   MatNullSpace      nearnullsp;
1381   const Vec         *nearnullvecs;
1382   Vec               *localnearnullsp;
1383   PetscBool         nnsp_has_cnst;
1384   PetscInt          nnsp_size;
1385   PetscScalar       *array;
1386   /* BLAS integers */
1387   PetscBLASInt      lwork,lierr;
1388   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1389   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1390   /* LAPACK working arrays for SVD or POD */
1391   PetscBool         skip_lapack;
1392   PetscScalar       *work;
1393   PetscReal         *singular_vals;
1394 #if defined(PETSC_USE_COMPLEX)
1395   PetscReal         *rwork;
1396 #endif
1397 #if defined(PETSC_MISSING_LAPACK_GESVD)
1398   PetscBLASInt      Blas_one_2=1;
1399   PetscScalar       *temp_basis,*correlation_mat;
1400 #else
1401   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1402   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1403 #endif
1404   /* reuse */
1405   PetscInt          olocal_primal_size;
1406   PetscInt          *oprimal_indices_local_idxs;
1407   /* change of basis */
1408   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1409   PetscBool         boolforchange,qr_needed;
1410   PetscBT           touched,change_basis,qr_needed_idx;
1411   /* auxiliary stuff */
1412   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1413   /* some quantities */
1414   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1415   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1416 
1417 
1418   PetscFunctionBegin;
1419   /* Destroy Mat objects computed previously */
1420   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1421   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1422   /* Get index sets for faces, edges and vertices from graph */
1423   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1424     pcbddc->use_vertices = PETSC_TRUE;
1425   }
1426   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1427   /* HACK: provide functions to set change of basis */
1428   if (!ISForVertices && pcbddc->NullSpace) {
1429     pcbddc->use_change_of_basis = PETSC_TRUE;
1430     pcbddc->use_change_on_faces = PETSC_FALSE;
1431   }
1432   /* print some info */
1433   if (pcbddc->dbg_flag) {
1434     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1435     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1436     i = 0;
1437     if (ISForVertices) {
1438       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1439     }
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1441     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1442     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1443     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1444   }
1445   /* check if near null space is attached to global mat */
1446   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1447   if (nearnullsp) {
1448     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1449     /* remove any stored info */
1450     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1451     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1452     /* store information for BDDC solver reuse */
1453     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1454     pcbddc->onearnullspace = nearnullsp;
1455     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1456     for (i=0;i<nnsp_size;i++) {
1457       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1458     }
1459   } else { /* if near null space is not provided BDDC uses constants by default */
1460     nnsp_size = 0;
1461     nnsp_has_cnst = PETSC_TRUE;
1462   }
1463   /* get max number of constraints on a single cc */
1464   max_constraints = nnsp_size;
1465   if (nnsp_has_cnst) max_constraints++;
1466 
1467   /*
1468        Evaluate maximum storage size needed by the procedure
1469        - temp_indices will contain start index of each constraint stored as follows
1470        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1471        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1472        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1473                                                                                                                                                          */
1474   total_counts = n_ISForFaces+n_ISForEdges;
1475   total_counts *= max_constraints;
1476   n_vertices = 0;
1477   if (ISForVertices) {
1478     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1479   }
1480   total_counts += n_vertices;
1481   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1482   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1483   total_counts = 0;
1484   max_size_of_constraint = 0;
1485   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1486     if (i<n_ISForEdges) {
1487       used_IS = &ISForEdges[i];
1488     } else {
1489       used_IS = &ISForFaces[i-n_ISForEdges];
1490     }
1491     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1492     total_counts += j;
1493     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1494   }
1495   total_counts *= max_constraints;
1496   total_counts += n_vertices;
1497   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1498   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1499   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1500   /* get local part of global near null space vectors */
1501   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1502   for (k=0;k<nnsp_size;k++) {
1503     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1504     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506   }
1507 
1508   /* whether or not to skip lapack calls */
1509   skip_lapack = PETSC_TRUE;
1510   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1511 
1512   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1513   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1514     PetscScalar temp_work;
1515 #if defined(PETSC_MISSING_LAPACK_GESVD)
1516     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1517     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1518     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1519     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1520 #if defined(PETSC_USE_COMPLEX)
1521     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1522 #endif
1523     /* now we evaluate the optimal workspace using query with lwork=-1 */
1524     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1525     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1526     lwork = -1;
1527     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1528 #if !defined(PETSC_USE_COMPLEX)
1529     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1530 #else
1531     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1532 #endif
1533     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1534     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1535 #else /* on missing GESVD */
1536     /* SVD */
1537     PetscInt max_n,min_n;
1538     max_n = max_size_of_constraint;
1539     min_n = max_constraints;
1540     if (max_size_of_constraint < max_constraints) {
1541       min_n = max_size_of_constraint;
1542       max_n = max_constraints;
1543     }
1544     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1545 #if defined(PETSC_USE_COMPLEX)
1546     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1547 #endif
1548     /* now we evaluate the optimal workspace using query with lwork=-1 */
1549     lwork = -1;
1550     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1551     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1552     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1553     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1554 #if !defined(PETSC_USE_COMPLEX)
1555     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1556 #else
1557     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1558 #endif
1559     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1560     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1561 #endif /* on missing GESVD */
1562     /* Allocate optimal workspace */
1563     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1564     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1565   }
1566   /* Now we can loop on constraining sets */
1567   total_counts = 0;
1568   temp_indices[0] = 0;
1569   /* vertices */
1570   if (ISForVertices) {
1571     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1572     if (nnsp_has_cnst) { /* consider all vertices */
1573       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1574       for (i=0;i<n_vertices;i++) {
1575         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1576         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1577         total_counts++;
1578       }
1579     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1580       PetscBool used_vertex;
1581       for (i=0;i<n_vertices;i++) {
1582         used_vertex = PETSC_FALSE;
1583         k = 0;
1584         while (!used_vertex && k<nnsp_size) {
1585           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1586           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1587             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1588             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1589             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1590             total_counts++;
1591             used_vertex = PETSC_TRUE;
1592           }
1593           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1594           k++;
1595         }
1596       }
1597     }
1598     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1599     n_vertices = total_counts;
1600   }
1601 
1602   /* edges and faces */
1603   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1604     if (i<n_ISForEdges) {
1605       used_IS = &ISForEdges[i];
1606       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1607     } else {
1608       used_IS = &ISForFaces[i-n_ISForEdges];
1609       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1610     }
1611     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1612     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1613     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1614     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1615     /* change of basis should not be performed on local periodic nodes */
1616     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1617     if (nnsp_has_cnst) {
1618       PetscScalar quad_value;
1619       temp_constraints++;
1620       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1621       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1622       for (j=0;j<size_of_constraint;j++) {
1623         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1624       }
1625       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1626       total_counts++;
1627     }
1628     for (k=0;k<nnsp_size;k++) {
1629       PetscReal real_value;
1630       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1631       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1632       for (j=0;j<size_of_constraint;j++) {
1633         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1634       }
1635       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1636       /* check if array is null on the connected component */
1637       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1638       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1639       if (real_value > 0.0) { /* keep indices and values */
1640         temp_constraints++;
1641         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1642         total_counts++;
1643       }
1644     }
1645     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1646     valid_constraints = temp_constraints;
1647     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1648     if (!pcbddc->use_nnsp_true && temp_constraints) {
1649       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1650 
1651 #if defined(PETSC_MISSING_LAPACK_GESVD)
1652       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1653          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1654          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1655             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1656             from that computed using LAPACKgesvd
1657          -> This is due to a different computation of eigenvectors in LAPACKheev
1658          -> The quality of the POD-computed basis will be the same */
1659       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1660       /* Store upper triangular part of correlation matrix */
1661       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1662       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1663       for (j=0;j<temp_constraints;j++) {
1664         for (k=0;k<j+1;k++) {
1665           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1666         }
1667       }
1668       /* compute eigenvalues and eigenvectors of correlation matrix */
1669       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1670       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1671 #if !defined(PETSC_USE_COMPLEX)
1672       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1673 #else
1674       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1675 #endif
1676       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1677       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1678       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1679       j=0;
1680       while (j < temp_constraints && singular_vals[j] < tol) j++;
1681       total_counts=total_counts-j;
1682       valid_constraints = temp_constraints-j;
1683       /* scale and copy POD basis into used quadrature memory */
1684       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1685       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1686       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1687       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1688       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1689       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1690       if (j<temp_constraints) {
1691         PetscInt ii;
1692         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1693         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1694         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1695         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1696         for (k=0;k<temp_constraints-j;k++) {
1697           for (ii=0;ii<size_of_constraint;ii++) {
1698             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1699           }
1700         }
1701       }
1702 #else  /* on missing GESVD */
1703       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1704       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1705       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1706       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1707 #if !defined(PETSC_USE_COMPLEX)
1708       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1709 #else
1710       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1711 #endif
1712       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1713       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1714       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1715       k = temp_constraints;
1716       if (k > size_of_constraint) k = size_of_constraint;
1717       j = 0;
1718       while (j < k && singular_vals[k-j-1] < tol) j++;
1719       total_counts = total_counts-temp_constraints+k-j;
1720       valid_constraints = k-j;
1721 #endif /* on missing GESVD */
1722     }
1723     /* setting change_of_basis flag is safe now */
1724     if (boolforchange) {
1725       for (j=0;j<valid_constraints;j++) {
1726         PetscBTSet(change_basis,total_counts-j-1);
1727       }
1728     }
1729   }
1730   /* free index sets of faces, edges and vertices */
1731   for (i=0;i<n_ISForFaces;i++) {
1732     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1733   }
1734   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1735   for (i=0;i<n_ISForEdges;i++) {
1736     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1737   }
1738   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1739   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1740   /* map temp_indices_to_constraint in boundary numbering */
1741   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1742   if (i != temp_indices[total_counts]) {
1743     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1744   }
1745 
1746   /* free workspace */
1747   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1748     ierr = PetscFree(work);CHKERRQ(ierr);
1749 #if defined(PETSC_USE_COMPLEX)
1750     ierr = PetscFree(rwork);CHKERRQ(ierr);
1751 #endif
1752     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1753 #if defined(PETSC_MISSING_LAPACK_GESVD)
1754     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1755     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1756 #endif
1757   }
1758   for (k=0;k<nnsp_size;k++) {
1759     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1760   }
1761   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1762 
1763   /* set quantities in pcbddc data structure and store previous primal size */
1764   /* n_vertices defines the number of subdomain corners in the primal space */
1765   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1766   olocal_primal_size = pcbddc->local_primal_size;
1767   pcbddc->local_primal_size = total_counts;
1768   pcbddc->n_vertices = n_vertices;
1769   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1770 
1771   /* Create constraint matrix */
1772   /* The constraint matrix is used to compute the l2g map of primal dofs */
1773   /* so we need to set it up properly either with or without change of basis */
1774   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1775   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1776   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1777   /* array to compute a local numbering of constraints : vertices first then constraints */
1778   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1779   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1780   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1781   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1782   /* auxiliary stuff for basis change */
1783   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1784   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1785 
1786   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1787   total_primal_vertices=0;
1788   for (i=0;i<pcbddc->local_primal_size;i++) {
1789     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1790     if (size_of_constraint == 1) {
1791       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1792       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1793       aux_primal_minloc[total_primal_vertices]=0;
1794       total_primal_vertices++;
1795     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1796       PetscInt min_loc,min_index;
1797       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1798       /* find first untouched local node */
1799       k = 0;
1800       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1801       min_index = global_indices[k];
1802       min_loc = k;
1803       /* search the minimum among global nodes already untouched on the cc */
1804       for (k=1;k<size_of_constraint;k++) {
1805         /* there can be more than one constraint on a single connected component */
1806         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1807           min_index = global_indices[k];
1808           min_loc = k;
1809         }
1810       }
1811       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1812       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1813       aux_primal_minloc[total_primal_vertices]=min_loc;
1814       total_primal_vertices++;
1815     }
1816   }
1817   /* determine if a QR strategy is needed for change of basis */
1818   qr_needed = PETSC_FALSE;
1819   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1820   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1821     if (PetscBTLookup(change_basis,i)) {
1822       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1823       j = 0;
1824       for (k=0;k<size_of_constraint;k++) {
1825         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1826           j++;
1827         }
1828       }
1829       /* found more than one primal dof on the cc */
1830       if (j > 1) {
1831         PetscBTSet(qr_needed_idx,i);
1832         qr_needed = PETSC_TRUE;
1833       }
1834     }
1835   }
1836   /* free workspace */
1837   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1838 
1839   /* permute indices in order to have a sorted set of vertices */
1840   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1841 
1842   /* nonzero structure of constraint matrix */
1843   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1844   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1845   j=total_primal_vertices;
1846   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1847     if (!PetscBTLookup(change_basis,i)) {
1848       nnz[j]=temp_indices[i+1]-temp_indices[i];
1849       j++;
1850     }
1851   }
1852   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1853   ierr = PetscFree(nnz);CHKERRQ(ierr);
1854   /* set values in constraint matrix */
1855   for (i=0;i<total_primal_vertices;i++) {
1856     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1857   }
1858   total_counts = total_primal_vertices;
1859   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1860     if (!PetscBTLookup(change_basis,i)) {
1861       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1862       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1863       total_counts++;
1864     }
1865   }
1866   /* assembling */
1867   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   /*
1870   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1871   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1872   */
1873   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1874   if (pcbddc->use_change_of_basis) {
1875     /* dual and primal dofs on a single cc */
1876     PetscInt     dual_dofs,primal_dofs;
1877     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1878     PetscInt     primal_counter;
1879     /* working stuff for GEQRF */
1880     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1881     PetscBLASInt lqr_work;
1882     /* working stuff for UNGQR */
1883     PetscScalar  *gqr_work,lgqr_work_t;
1884     PetscBLASInt lgqr_work;
1885     /* working stuff for TRTRS */
1886     PetscScalar  *trs_rhs;
1887     PetscBLASInt Blas_NRHS;
1888     /* pointers for values insertion into change of basis matrix */
1889     PetscInt     *start_rows,*start_cols;
1890     PetscScalar  *start_vals;
1891     /* working stuff for values insertion */
1892     PetscBT      is_primal;
1893 
1894     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1895     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1896     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1897     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1898     /* work arrays */
1899     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1900     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1901     /* nonzeros per row */
1902     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1903       if (PetscBTLookup(change_basis,i)) {
1904         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1905         if (PetscBTLookup(qr_needed_idx,i)) {
1906           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1907         } else {
1908           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1909           /* get local primal index on the cc */
1910           j = 0;
1911           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1912           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1913         }
1914       }
1915     }
1916     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1917     ierr = PetscFree(nnz);CHKERRQ(ierr);
1918     /* Set initial identity in the matrix */
1919     for (i=0;i<pcis->n_B;i++) {
1920       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1921     }
1922 
1923     if (pcbddc->dbg_flag) {
1924       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1925       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1926     }
1927 
1928 
1929     /* Now we loop on the constraints which need a change of basis */
1930     /*
1931        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1932        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1933 
1934        Basic blocks of change of basis matrix T computed
1935 
1936           - Using the following block transformation if there is only a primal dof on the cc
1937             (in the example, primal dof is the last one of the edge in LOCAL ordering
1938              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1939             | 1        0   ...        0     1 |
1940             | 0        1   ...        0     1 |
1941             |              ...                |
1942             | 0        ...            1     1 |
1943             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1944 
1945           - via QR decomposition of constraints otherwise
1946     */
1947     if (qr_needed) {
1948       /* space to store Q */
1949       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1950       /* first we issue queries for optimal work */
1951       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1953       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1954       lqr_work = -1;
1955       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1956       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1957       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1958       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1959       lgqr_work = -1;
1960       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1961       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1962       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1963       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1964       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1965       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1966       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1967       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1968       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1969       /* array to store scaling factors for reflectors */
1970       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1971       /* array to store rhs and solution of triangular solver */
1972       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1973       /* allocating workspace for check */
1974       if (pcbddc->dbg_flag) {
1975         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1976       }
1977     }
1978     /* array to store whether a node is primal or not */
1979     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1980     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1981     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1982     if (i != total_primal_vertices) {
1983       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1984     }
1985     for (i=0;i<total_primal_vertices;i++) {
1986       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1987     }
1988     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1989 
1990     /* loop on constraints and see whether or not they need a change of basis and compute it */
1991     /* -> using implicit ordering contained in temp_indices data */
1992     total_counts = pcbddc->n_vertices;
1993     primal_counter = total_counts;
1994     while (total_counts<pcbddc->local_primal_size) {
1995       primal_dofs = 1;
1996       if (PetscBTLookup(change_basis,total_counts)) {
1997         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1998         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1999           primal_dofs++;
2000         }
2001         /* get constraint info */
2002         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2003         dual_dofs = size_of_constraint-primal_dofs;
2004 
2005         if (pcbddc->dbg_flag) {
2006           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2007         }
2008 
2009         if (primal_dofs > 1) { /* QR */
2010 
2011           /* copy quadrature constraints for change of basis check */
2012           if (pcbddc->dbg_flag) {
2013             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2014           }
2015           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2016           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2017 
2018           /* compute QR decomposition of constraints */
2019           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2020           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2021           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2022           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2023           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2024           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2025           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2026 
2027           /* explictly compute R^-T */
2028           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2029           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2030           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2031           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2032           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2033           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2034           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2035           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2036           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2037           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2038 
2039           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2040           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2041           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2042           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2043           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2044           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2045           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2046           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2047           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2048 
2049           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2050              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2051              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2052           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2053           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2056           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2057           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2058           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2059           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2060           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2061           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2062 
2063           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2064           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2065           /* insert cols for primal dofs */
2066           for (j=0;j<primal_dofs;j++) {
2067             start_vals = &qr_basis[j*size_of_constraint];
2068             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2069             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2070           }
2071           /* insert cols for dual dofs */
2072           for (j=0,k=0;j<dual_dofs;k++) {
2073             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2074               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2075               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2076               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2077               j++;
2078             }
2079           }
2080 
2081           /* check change of basis */
2082           if (pcbddc->dbg_flag) {
2083             PetscInt   ii,jj;
2084             PetscBool valid_qr=PETSC_TRUE;
2085             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2086             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2087             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2088             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2089             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2090             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2091             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2092             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2093             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2094             for (jj=0;jj<size_of_constraint;jj++) {
2095               for (ii=0;ii<primal_dofs;ii++) {
2096                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2097                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2098               }
2099             }
2100             if (!valid_qr) {
2101               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2102               for (jj=0;jj<size_of_constraint;jj++) {
2103                 for (ii=0;ii<primal_dofs;ii++) {
2104                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2105                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2106                   }
2107                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2108                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2109                   }
2110                 }
2111               }
2112             } else {
2113               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2114             }
2115           }
2116         } else { /* simple transformation block */
2117           PetscInt row,col;
2118           PetscScalar val;
2119           for (j=0;j<size_of_constraint;j++) {
2120             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2121             if (!PetscBTLookup(is_primal,row)) {
2122               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2123               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2124               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2125             } else {
2126               for (k=0;k<size_of_constraint;k++) {
2127                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2128                 if (row != col) {
2129                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2130                 } else {
2131                   val = 1.0;
2132                 }
2133                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2134               }
2135             }
2136           }
2137           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2138         }
2139         /* increment primal counter */
2140         primal_counter += primal_dofs;
2141       } else {
2142         if (pcbddc->dbg_flag) {
2143           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2144         }
2145       }
2146       /* increment constraint counter total_counts */
2147       total_counts += primal_dofs;
2148     }
2149 
2150     /* free workspace */
2151     if (qr_needed) {
2152       if (pcbddc->dbg_flag) {
2153         ierr = PetscFree(work);CHKERRQ(ierr);
2154       }
2155       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2156       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2157       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2158       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2159       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2160     }
2161     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2162     /* assembling */
2163     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165     /*
2166     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2167     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2168     */
2169   }
2170 
2171   /* get indices in local ordering for vertices and constraints */
2172   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2173     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2174     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2175   }
2176   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2177   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2178   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2179   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2180   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2181   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2182   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2183   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2184   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2185   /* set quantities in PCBDDC data struct */
2186   pcbddc->n_actual_vertices = i;
2187   /* check if a new primal space has been introduced */
2188   pcbddc->new_primal_space_local = PETSC_TRUE;
2189   if (olocal_primal_size == pcbddc->local_primal_size) {
2190     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2191     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2192     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2193   }
2194   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2195   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2196 
2197   /* flush dbg viewer */
2198   if (pcbddc->dbg_flag) {
2199     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2200   }
2201 
2202   /* free workspace */
2203   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2204   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2205   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2206   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2207   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2208   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2209   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2210   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2216 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2217 {
2218   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2219   PC_IS       *pcis = (PC_IS*)pc->data;
2220   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2221   PetscInt    ierr,i,vertex_size;
2222   PetscViewer viewer=pcbddc->dbg_viewer;
2223 
2224   PetscFunctionBegin;
2225   /* Reset previously computed graph */
2226   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2227   /* Init local Graph struct */
2228   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2229 
2230   /* Check validity of the csr graph passed in by the user */
2231   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2232     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2233   }
2234 
2235   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2236   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2237     Mat mat_adj;
2238     const PetscInt *xadj,*adjncy;
2239     PetscBool flg_row=PETSC_TRUE;
2240 
2241     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2242     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2243     if (!flg_row) {
2244       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2245     }
2246     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2247     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2248     if (!flg_row) {
2249       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2250     }
2251     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2252   }
2253 
2254   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2255   vertex_size = 1;
2256   if (pcbddc->user_provided_isfordofs) {
2257     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2258       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2259       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2260         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2261         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2262       }
2263       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2264       pcbddc->n_ISForDofs = 0;
2265       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2266     }
2267     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2268     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2269   } else {
2270     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2271       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2272       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2273       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2274         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2275       }
2276     }
2277   }
2278 
2279   /* Setup of Graph */
2280   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2281     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2282   }
2283   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2284     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2285   }
2286   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2287 
2288   /* Graph's connected components analysis */
2289   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2290 
2291   /* print some info to stdout */
2292   if (pcbddc->dbg_flag) {
2293     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2294   }
2295 
2296   /* mark topography has done */
2297   pcbddc->recompute_topography = PETSC_FALSE;
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2303 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2304 {
2305   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2306   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   n = 0;
2311   vertices = 0;
2312   if (pcbddc->ConstraintMatrix) {
2313     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2314     for (i=0;i<local_primal_size;i++) {
2315       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2316       if (size_of_constraint == 1) n++;
2317       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2318     }
2319     if (vertices_idx) {
2320       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2321       n = 0;
2322       for (i=0;i<local_primal_size;i++) {
2323         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2324         if (size_of_constraint == 1) {
2325           vertices[n++]=row_cmat_indices[0];
2326         }
2327         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2328       }
2329     }
2330   }
2331   *n_vertices = n;
2332   if (vertices_idx) *vertices_idx = vertices;
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2338 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2339 {
2340   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2341   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2342   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2343   PetscBT        touched;
2344   PetscErrorCode ierr;
2345 
2346     /* This function assumes that the number of local constraints per connected component
2347        is not greater than the number of nodes defined for the connected component
2348        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2349   PetscFunctionBegin;
2350   n = 0;
2351   constraints_index = 0;
2352   if (pcbddc->ConstraintMatrix) {
2353     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2354     max_size_of_constraint = 0;
2355     for (i=0;i<local_primal_size;i++) {
2356       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2357       if (size_of_constraint > 1) {
2358         n++;
2359       }
2360       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2361       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2362     }
2363     if (constraints_idx) {
2364       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2365       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2366       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2367       n = 0;
2368       for (i=0;i<local_primal_size;i++) {
2369         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2370         if (size_of_constraint > 1) {
2371           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2372           /* find first untouched local node */
2373           j = 0;
2374           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2375           min_index = row_cmat_global_indices[j];
2376           min_loc = j;
2377           /* search the minimum among nodes not yet touched on the connected component
2378              since there can be more than one constraint on a single cc */
2379           for (j=1;j<size_of_constraint;j++) {
2380             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2381               min_index = row_cmat_global_indices[j];
2382               min_loc = j;
2383             }
2384           }
2385           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2386           constraints_index[n++] = row_cmat_indices[min_loc];
2387         }
2388         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2389       }
2390       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2391       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2392     }
2393   }
2394   *n_constraints = n;
2395   if (constraints_idx) *constraints_idx = constraints_index;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 /* the next two functions has been adapted from pcis.c */
2400 #undef __FUNCT__
2401 #define __FUNCT__ "PCBDDCApplySchur"
2402 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2403 {
2404   PetscErrorCode ierr;
2405   PC_IS          *pcis = (PC_IS*)(pc->data);
2406 
2407   PetscFunctionBegin;
2408   if (!vec2_B) { vec2_B = v; }
2409   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2410   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2411   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2412   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2413   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2419 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2420 {
2421   PetscErrorCode ierr;
2422   PC_IS          *pcis = (PC_IS*)(pc->data);
2423 
2424   PetscFunctionBegin;
2425   if (!vec2_B) { vec2_B = v; }
2426   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2427   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2428   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2429   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2430   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "PCBDDCSubsetNumbering"
2436 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2437 {
2438   Vec            local_vec,global_vec;
2439   IS             seqis,paris;
2440   VecScatter     scatter_ctx;
2441   PetscScalar    *array;
2442   PetscInt       *temp_global_dofs;
2443   PetscScalar    globalsum;
2444   PetscInt       i,j,s;
2445   PetscInt       nlocals,first_index,old_index,max_local;
2446   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2447   PetscMPIInt    *dof_sizes,*dof_displs;
2448   PetscBool      first_found;
2449   PetscErrorCode ierr;
2450 
2451   PetscFunctionBegin;
2452   /* mpi buffers */
2453   MPI_Comm_size(comm,&size_prec_comm);
2454   MPI_Comm_rank(comm,&rank_prec_comm);
2455   j = ( !rank_prec_comm ? size_prec_comm : 0);
2456   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2457   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2458   /* get maximum size of subset */
2459   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2460   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2461   max_local = 0;
2462   if (n_local_dofs) {
2463     max_local = temp_global_dofs[0];
2464     for (i=1;i<n_local_dofs;i++) {
2465       if (max_local < temp_global_dofs[i] ) {
2466         max_local = temp_global_dofs[i];
2467       }
2468     }
2469   }
2470   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2471   max_global++;
2472   max_local = 0;
2473   if (n_local_dofs) {
2474     max_local = local_dofs[0];
2475     for (i=1;i<n_local_dofs;i++) {
2476       if (max_local < local_dofs[i] ) {
2477         max_local = local_dofs[i];
2478       }
2479     }
2480   }
2481   max_local++;
2482   /* allocate workspace */
2483   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2484   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2485   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2486   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2487   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2488   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2489   /* create scatter */
2490   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2491   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2492   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2493   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2494   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2495   /* init array */
2496   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2497   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2498   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2499   if (local_dofs_mult) {
2500     for (i=0;i<n_local_dofs;i++) {
2501       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2502     }
2503   } else {
2504     for (i=0;i<n_local_dofs;i++) {
2505       array[local_dofs[i]]=1.0;
2506     }
2507   }
2508   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2509   /* scatter into global vec and get total number of global dofs */
2510   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2511   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2513   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2514   /* Fill global_vec with cumulative function for global numbering */
2515   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2516   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2517   nlocals = 0;
2518   first_index = -1;
2519   first_found = PETSC_FALSE;
2520   for (i=0;i<s;i++) {
2521     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2522       first_found = PETSC_TRUE;
2523       first_index = i;
2524     }
2525     nlocals += (PetscInt)PetscRealPart(array[i]);
2526   }
2527   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2528   if (!rank_prec_comm) {
2529     dof_displs[0]=0;
2530     for (i=1;i<size_prec_comm;i++) {
2531       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2532     }
2533   }
2534   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2535   if (first_found) {
2536     array[first_index] += (PetscScalar)nlocals;
2537     old_index = first_index;
2538     for (i=first_index+1;i<s;i++) {
2539       if (PetscRealPart(array[i]) > 0.0) {
2540         array[i] += array[old_index];
2541         old_index = i;
2542       }
2543     }
2544   }
2545   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2546   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2547   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2548   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2549   /* get global ordering of local dofs */
2550   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2551   if (local_dofs_mult) {
2552     for (i=0;i<n_local_dofs;i++) {
2553       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2554     }
2555   } else {
2556     for (i=0;i<n_local_dofs;i++) {
2557       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2558     }
2559   }
2560   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2561   /* free workspace */
2562   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2563   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2564   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2565   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2566   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2567   /* return pointer to global ordering of local dofs */
2568   *global_numbering_subset = temp_global_dofs;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2574 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2575 {
2576   PetscInt       i,j;
2577   PetscScalar    *alphas;
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   /* this implements stabilized Gram-Schmidt */
2582   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2583   for (i=0;i<n;i++) {
2584     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2585     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2586     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2587   }
2588   ierr = PetscFree(alphas);CHKERRQ(ierr);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 /* TODO
2593    - now preallocation is done assuming SEQDENSE local matrices
2594 */
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatISGetMPIXAIJ"
2597 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2598 {
2599   Mat                    new_mat;
2600   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2601   /* info on mat */
2602   /* ISLocalToGlobalMapping rmapping,cmapping; */
2603   PetscInt               bs,rows,cols;
2604   PetscInt               lrows,lcols;
2605   PetscInt               local_rows,local_cols;
2606   PetscBool              isdense;
2607   /* values insertion */
2608   PetscScalar            *array;
2609   PetscInt               *local_indices,*global_indices;
2610   /* work */
2611   PetscInt               i,j,index_row;
2612   PetscErrorCode         ierr;
2613 
2614   PetscFunctionBegin;
2615   /* MISSING CHECKS
2616     - rectangular case not covered (it is not allowed by MATIS)
2617   */
2618   /* get info from mat */
2619   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2620   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2621   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2622   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2623 
2624   /* work */
2625   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2626   for (i=0;i<local_rows;i++) local_indices[i]=i;
2627   /* map indices of local mat to global values */
2628   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2629   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2630   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2631 
2632   if (reuse==MAT_INITIAL_MATRIX) {
2633     Vec         vec_dnz,vec_onz;
2634     PetscScalar *my_dnz,*my_onz;
2635     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2636     PetscInt    index_col,owner;
2637     PetscMPIInt nsubdomains;
2638 
2639     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2640     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2641     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2642     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2643     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2644     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2645     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2646 
2647     /*
2648       preallocation
2649     */
2650 
2651     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2652     /*
2653        Some vectors are needed to sum up properly on shared interface dofs.
2654        Preallocation macros cannot do the job.
2655        Note that preallocation is not exact, since it overestimates nonzeros
2656     */
2657     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2658     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2659     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2660     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2661     /* All processes need to compute entire row ownership */
2662     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2663     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2664     for (i=0;i<nsubdomains;i++) {
2665       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2666         row_ownership[j]=i;
2667       }
2668     }
2669 
2670     /*
2671        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2672        then, they will be summed up properly. This way, preallocation is always sufficient
2673     */
2674     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2675     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2676     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2677     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2678     for (i=0;i<local_rows;i++) {
2679       index_row = global_indices[i];
2680       for (j=i;j<local_rows;j++) {
2681         owner = row_ownership[index_row];
2682         index_col = global_indices[j];
2683         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2684           my_dnz[i] += 1.0;
2685         } else { /* offdiag block */
2686           my_onz[i] += 1.0;
2687         }
2688         /* same as before, interchanging rows and cols */
2689         if (i != j) {
2690           owner = row_ownership[index_col];
2691           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2692             my_dnz[j] += 1.0;
2693           } else {
2694             my_onz[j] += 1.0;
2695           }
2696         }
2697       }
2698     }
2699     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2700     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2701     if (local_rows) { /* multilevel guard */
2702       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2703       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2704     }
2705     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2706     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2707     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2708     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2709     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2710     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2711     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2712 
2713     /* set computed preallocation in dnz and onz */
2714     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2715     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2716     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2717     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2718     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2719     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2720     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2721     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2722 
2723     /* Resize preallocation if overestimated */
2724     for (i=0;i<lrows;i++) {
2725       dnz[i] = PetscMin(dnz[i],lcols);
2726       onz[i] = PetscMin(onz[i],cols-lcols);
2727     }
2728     /* set preallocation */
2729     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2730     for (i=0;i<lrows/bs;i++) {
2731       dnz[i] = dnz[i*bs]/bs;
2732       onz[i] = onz[i*bs]/bs;
2733     }
2734     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2735     for (i=0;i<lrows/bs;i++) {
2736       dnz[i] = dnz[i]-i;
2737     }
2738     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2739     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2740     *M = new_mat;
2741   } else {
2742     PetscInt mbs,mrows,mcols;
2743     /* some checks */
2744     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2745     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2746     if (mrows != rows) {
2747       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2748     }
2749     if (mrows != rows) {
2750       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2751     }
2752     if (mbs != bs) {
2753       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2754     }
2755     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2756   }
2757   /* set local to global mappings */
2758   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2759   /* Set values */
2760   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2761   if (isdense) { /* special case for dense local matrices */
2762     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2763     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2764     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2765     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2766     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2767     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2768   } else { /* very basic values insertion for all other matrix types */
2769     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2770     for (i=0;i<local_rows;i++) {
2771       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2772       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2773       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2774       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2775       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2776       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2777     }
2778     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2779   }
2780   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   if (isdense) {
2783     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2784   }
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatISGetSubassemblingPattern"
2790 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2791 {
2792   Mat             subdomain_adj;
2793   IS              new_ranks,ranks_send_to;
2794   MatPartitioning partitioner;
2795   Mat_IS          *matis;
2796   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2797   PetscInt        prank;
2798   PetscMPIInt     size,rank,color;
2799   PetscInt        *xadj,*adjncy,*oldranks;
2800   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2801   PetscInt        i,j,local_size,threshold=0;
2802   PetscErrorCode  ierr;
2803   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2804   PetscSubcomm    subcomm;
2805 
2806   PetscFunctionBegin;
2807   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2808   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2809   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2810 
2811   /* Get info on mapping */
2812   matis = (Mat_IS*)(mat->data);
2813   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2814   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2815 
2816   /* build local CSR graph of subdomains' connectivity */
2817   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2818   xadj[0] = 0;
2819   xadj[1] = PetscMax(n_neighs-1,0);
2820   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2821   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2822 
2823   if (threshold) {
2824     PetscInt* count,min_threshold;
2825     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2826     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2827     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2828       for (j=0;j<n_shared[i];j++) {
2829         count[shared[i][j]] += 1;
2830       }
2831     }
2832     /* adapt threshold since we dont want to lose significant connections */
2833     min_threshold = n_neighs;
2834     for (i=1;i<n_neighs;i++) {
2835       for (j=0;j<n_shared[i];j++) {
2836         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2837       }
2838     }
2839     threshold = PetscMax(min_threshold+1,threshold);
2840     xadj[1] = 0;
2841     for (i=1;i<n_neighs;i++) {
2842       for (j=0;j<n_shared[i];j++) {
2843         if (count[shared[i][j]] < threshold) {
2844           adjncy[xadj[1]] = neighs[i];
2845           adjncy_wgt[xadj[1]] = n_shared[i];
2846           xadj[1]++;
2847           break;
2848         }
2849       }
2850     }
2851     ierr = PetscFree(count);CHKERRQ(ierr);
2852   } else {
2853     if (xadj[1]) {
2854       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2855       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2856     }
2857   }
2858   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2859   if (use_square) {
2860     for (i=0;i<xadj[1];i++) {
2861       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2862     }
2863   }
2864   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2865 
2866   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2867 
2868   /*
2869     Restrict work on active processes only.
2870   */
2871   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2872   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2873   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2874   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2875   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2876   if (color) {
2877     ierr = PetscFree(xadj);CHKERRQ(ierr);
2878     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2879     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2880   } else {
2881     PetscInt coarsening_ratio;
2882     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2883     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2884     prank = rank;
2885     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2886     /*
2887     for (i=0;i<size;i++) {
2888       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2889     }
2890     */
2891     for (i=0;i<xadj[1];i++) {
2892       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2893     }
2894     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2895     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2896     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2897 
2898     /* Partition */
2899     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2900     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2901     if (use_vwgt) {
2902       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2903       v_wgt[0] = local_size;
2904       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2905     }
2906     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2907     coarsening_ratio = size/n_subdomains;
2908     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2909     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2910     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2911     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2912     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2913 
2914     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2915     if (contiguous) {
2916       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2917     } else {
2918       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2919     }
2920     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2921     /* clean up */
2922     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2923     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2924     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2925     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2926   }
2927   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2928 
2929   /* assemble parallel IS for sends */
2930   i = 1;
2931   if (color) i=0;
2932   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2933 
2934   /* get back IS */
2935   *is_sends = ranks_send_to;
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2940 
2941 #undef __FUNCT__
2942 #define __FUNCT__ "MatISSubassemble"
2943 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2944 {
2945   Mat                    local_mat;
2946   Mat_IS                 *matis;
2947   IS                     is_sends_internal;
2948   PetscInt               rows,cols;
2949   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2950   PetscBool              ismatis,isdense,destroy_mat;
2951   ISLocalToGlobalMapping l2gmap;
2952   PetscInt*              l2gmap_indices;
2953   const PetscInt*        is_indices;
2954   MatType                new_local_type;
2955   /* buffers */
2956   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2957   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2958   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2959   /* MPI */
2960   MPI_Comm               comm,comm_n;
2961   PetscSubcomm           subcomm;
2962   PetscMPIInt            n_sends,n_recvs,commsize;
2963   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2964   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2965   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2966   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2967   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2968   PetscErrorCode         ierr;
2969 
2970   PetscFunctionBegin;
2971   /* TODO: add missing checks */
2972   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2973   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2974   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2975   PetscValidLogicalCollectiveInt(mat,nis,7);
2976   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2977   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2978   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2979   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2980   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2981   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2982   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2983   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2984     PetscInt mrows,mcols,mnrows,mncols;
2985     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2986     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2987     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2988     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2989     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2990     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2991   }
2992   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2993   PetscValidLogicalCollectiveInt(mat,bs,0);
2994   /* prepare IS for sending if not provided */
2995   if (!is_sends) {
2996     PetscBool pcontig = PETSC_TRUE;
2997     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
2998     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
2999   } else {
3000     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3001     is_sends_internal = is_sends;
3002   }
3003 
3004   /* get pointer of MATIS data */
3005   matis = (Mat_IS*)mat->data;
3006 
3007   /* get comm */
3008   comm = PetscObjectComm((PetscObject)mat);
3009 
3010   /* compute number of sends */
3011   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3012   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3013 
3014   /* compute number of receives */
3015   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3016   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
3017   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3018   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3019   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3020   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3021   ierr = PetscFree(iflags);CHKERRQ(ierr);
3022 
3023   /* restrict comm if requested */
3024   subcomm = 0;
3025   destroy_mat = PETSC_FALSE;
3026   if (restrict_comm) {
3027     PetscMPIInt color,rank,subcommsize;
3028     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3029     color = 0;
3030     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3031     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3032     subcommsize = commsize - subcommsize;
3033     /* check if reuse has been requested */
3034     if (reuse == MAT_REUSE_MATRIX) {
3035       if (*mat_n) {
3036         PetscMPIInt subcommsize2;
3037         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3038         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3039         comm_n = PetscObjectComm((PetscObject)*mat_n);
3040       } else {
3041         comm_n = PETSC_COMM_SELF;
3042       }
3043     } else { /* MAT_INITIAL_MATRIX */
3044       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3045       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3046       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3047       comm_n = subcomm->comm;
3048     }
3049     /* flag to destroy *mat_n if not significative */
3050     if (color) destroy_mat = PETSC_TRUE;
3051   } else {
3052     comm_n = comm;
3053   }
3054 
3055   /* prepare send/receive buffers */
3056   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
3057   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3058   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
3059   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3060   if (nis) {
3061     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3062     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3063   }
3064 
3065   /* Get data from local matrices */
3066   if (!isdense) {
3067     /* TODO: See below some guidelines on how to prepare the local buffers */
3068     /*
3069        send_buffer_vals should contain the raw values of the local matrix
3070        send_buffer_idxs should contain:
3071        - MatType_PRIVATE type
3072        - PetscInt        size_of_l2gmap
3073        - PetscInt        global_row_indices[size_of_l2gmap]
3074        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3075     */
3076   } else {
3077     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3078     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3079     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
3080     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3081     send_buffer_idxs[1] = i;
3082     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3083     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3084     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3085     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3086     for (i=0;i<n_sends;i++) {
3087       ilengths_vals[is_indices[i]] = len*len;
3088       ilengths_idxs[is_indices[i]] = len+2;
3089     }
3090   }
3091   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3092   /* additional is (if any) */
3093   if (nis) {
3094     PetscMPIInt psum;
3095     PetscInt j;
3096     for (j=0,psum=0;j<nis;j++) {
3097       PetscInt plen;
3098       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3099       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3100       psum += len+1; /* indices + lenght */
3101     }
3102     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3103     for (j=0,psum=0;j<nis;j++) {
3104       PetscInt plen;
3105       const PetscInt *is_array_idxs;
3106       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3107       send_buffer_idxs_is[psum] = plen;
3108       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3109       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3110       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3111       psum += plen+1; /* indices + lenght */
3112     }
3113     for (i=0;i<n_sends;i++) {
3114       ilengths_idxs_is[is_indices[i]] = psum;
3115     }
3116     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3117   }
3118 
3119   buf_size_idxs = 0;
3120   buf_size_vals = 0;
3121   buf_size_idxs_is = 0;
3122   for (i=0;i<n_recvs;i++) {
3123     buf_size_idxs += (PetscInt)olengths_idxs[i];
3124     buf_size_vals += (PetscInt)olengths_vals[i];
3125     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3126   }
3127   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
3128   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
3129   ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr);
3130 
3131   /* get new tags for clean communications */
3132   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3133   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3134   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3135 
3136   /* allocate for requests */
3137   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
3138   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
3139   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr);
3140   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
3141   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
3142   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr);
3143 
3144   /* communications */
3145   ptr_idxs = recv_buffer_idxs;
3146   ptr_vals = recv_buffer_vals;
3147   ptr_idxs_is = recv_buffer_idxs_is;
3148   for (i=0;i<n_recvs;i++) {
3149     source_dest = onodes[i];
3150     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3151     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3152     ptr_idxs += olengths_idxs[i];
3153     ptr_vals += olengths_vals[i];
3154     if (nis) {
3155       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3156       ptr_idxs_is += olengths_idxs_is[i];
3157     }
3158   }
3159   for (i=0;i<n_sends;i++) {
3160     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3161     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3162     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3163     if (nis) {
3164       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3165     }
3166   }
3167   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3168   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3169 
3170   /* assemble new l2g map */
3171   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3172   ptr_idxs = recv_buffer_idxs;
3173   buf_size_idxs = 0;
3174   for (i=0;i<n_recvs;i++) {
3175     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3176     ptr_idxs += olengths_idxs[i];
3177   }
3178   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3179   ptr_idxs = recv_buffer_idxs;
3180   buf_size_idxs = 0;
3181   for (i=0;i<n_recvs;i++) {
3182     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3183     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3184     ptr_idxs += olengths_idxs[i];
3185   }
3186   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3187   ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3188   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3189 
3190   /* infer new local matrix type from received local matrices type */
3191   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3192   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3193   if (n_recvs) {
3194     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3195     ptr_idxs = recv_buffer_idxs;
3196     for (i=0;i<n_recvs;i++) {
3197       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3198         new_local_type_private = MATAIJ_PRIVATE;
3199         break;
3200       }
3201       ptr_idxs += olengths_idxs[i];
3202     }
3203     switch (new_local_type_private) {
3204       case MATDENSE_PRIVATE:
3205         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3206           new_local_type = MATSEQAIJ;
3207           bs = 1;
3208         } else { /* if I receive only 1 dense matrix */
3209           new_local_type = MATSEQDENSE;
3210           bs = 1;
3211         }
3212         break;
3213       case MATAIJ_PRIVATE:
3214         new_local_type = MATSEQAIJ;
3215         bs = 1;
3216         break;
3217       case MATBAIJ_PRIVATE:
3218         new_local_type = MATSEQBAIJ;
3219         break;
3220       case MATSBAIJ_PRIVATE:
3221         new_local_type = MATSEQSBAIJ;
3222         break;
3223       default:
3224         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3225         break;
3226     }
3227   } else { /* by default, new_local_type is seqdense */
3228     new_local_type = MATSEQDENSE;
3229     bs = 1;
3230   }
3231 
3232   /* create MATIS object if needed */
3233   if (reuse == MAT_INITIAL_MATRIX) {
3234     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3235     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3236   } else {
3237     /* it also destroys the local matrices */
3238     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3239   }
3240   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3241   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3242   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3243   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3244 
3245   /* set values */
3246   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3247   ptr_vals = recv_buffer_vals;
3248   ptr_idxs = recv_buffer_idxs;
3249   for (i=0;i<n_recvs;i++) {
3250     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3251       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3252       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3253       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3254       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3255       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3256     } else {
3257       /* TODO */
3258     }
3259     ptr_idxs += olengths_idxs[i];
3260     ptr_vals += olengths_vals[i];
3261   }
3262   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266 
3267 #if 1
3268   if (!restrict_comm) { /* check */
3269     Vec       lvec,rvec;
3270     PetscReal infty_error;
3271 
3272     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3273     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3274     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3275     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3276     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3277     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3278     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3279     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3280     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3281   }
3282 #endif
3283 
3284   /* assemble new additional is (if any) */
3285   if (nis) {
3286     PetscInt **temp_idxs,*count_is,j,psum;
3287 
3288     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3289     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3290     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3291     ptr_idxs = recv_buffer_idxs_is;
3292     psum = 0;
3293     for (i=0;i<n_recvs;i++) {
3294       for (j=0;j<nis;j++) {
3295         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3296         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3297         psum += plen;
3298         ptr_idxs += plen+1; /* shift pointer to received data */
3299       }
3300     }
3301     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3302     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3303     for (i=1;i<nis;i++) {
3304       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3305     }
3306     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3307     ptr_idxs = recv_buffer_idxs_is;
3308     for (i=0;i<n_recvs;i++) {
3309       for (j=0;j<nis;j++) {
3310         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3311         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3312         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3313         ptr_idxs += plen+1; /* shift pointer to received data */
3314       }
3315     }
3316     for (i=0;i<nis;i++) {
3317       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3318       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3319       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3320     }
3321     ierr = PetscFree(count_is);CHKERRQ(ierr);
3322     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3323     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3324   }
3325   /* free workspace */
3326   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3327   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3328   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3329   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3330   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3331   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3332   if (isdense) {
3333     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3334     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3335   } else {
3336     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3337   }
3338   if (nis) {
3339     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3340     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3341   }
3342   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3343   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3344   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3345   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3346   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3347   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3348   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3349   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3350   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3351   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3352   ierr = PetscFree(onodes);CHKERRQ(ierr);
3353   if (nis) {
3354     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3355     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3356     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3357   }
3358   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3359   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3360     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3361     for (i=0;i<nis;i++) {
3362       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3363     }
3364   }
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 /* temporary hack into ksp private data structure */
3369 #include <petsc-private/kspimpl.h>
3370 
3371 #undef __FUNCT__
3372 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3373 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3374 {
3375   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3376   PC_IS                  *pcis = (PC_IS*)pc->data;
3377   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3378   MatNullSpace           CoarseNullSpace=NULL;
3379   ISLocalToGlobalMapping coarse_islg;
3380   IS                     coarse_is,*isarray;
3381   PetscInt               i,im_active=-1,active_procs=-1;
3382   PetscInt               nis,nisdofs,nisneu;
3383   PC                     pc_temp;
3384   PCType                 coarse_pc_type;
3385   KSPType                coarse_ksp_type;
3386   PetscBool              multilevel_requested,multilevel_allowed;
3387   PetscBool              setsym,issym,isherm,isbddc,isnn,coarse_reuse;
3388   MatStructure           matstruct;
3389   Mat                    t_coarse_mat_is;
3390   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3391   PetscMPIInt            all_procs;
3392   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3393   PetscErrorCode         ierr;
3394 
3395   PetscFunctionBegin;
3396   /* Assign global numbering to coarse dofs */
3397   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3398     PetscInt ocoarse_size;
3399     ocoarse_size = pcbddc->coarse_size;
3400     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3401     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3402     /* see if we can avoid some work */
3403     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3404       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3405         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3406         coarse_reuse = PETSC_FALSE;
3407       } else { /* we can safely reuse already computed coarse matrix */
3408         coarse_reuse = PETSC_TRUE;
3409       }
3410     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3411       coarse_reuse = PETSC_FALSE;
3412     }
3413     /* reset any subassembling information */
3414     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3415     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3416   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3417     coarse_reuse = PETSC_TRUE;
3418   }
3419 
3420   /* count "active" (i.e. with positive local size) and "void" processes */
3421   im_active = !!(pcis->n);
3422   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3423   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3424   void_procs = all_procs-active_procs;
3425   csin_type_simple = PETSC_TRUE;
3426   if (pcbddc->current_level) {
3427     csin_ml = PETSC_TRUE;
3428     ncoarse_ml = void_procs;
3429     csin_ds = PETSC_TRUE;
3430     ncoarse_ds = void_procs;
3431     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3432   } else {
3433     csin_ml = PETSC_FALSE;
3434     ncoarse_ml = all_procs;
3435     if (void_procs) {
3436       csin_ds = PETSC_TRUE;
3437       ncoarse_ds = void_procs;
3438       csin_type_simple = PETSC_FALSE;
3439     } else {
3440       csin_ds = PETSC_FALSE;
3441       ncoarse_ds = all_procs;
3442     }
3443   }
3444 
3445   /*
3446     test if we can go multilevel: three conditions must be satisfied:
3447     - we have not exceeded the number of levels requested
3448     - we can actually subassemble the active processes
3449     - we can find a suitable number of MPI processes where we can place the subassembled problem
3450   */
3451   multilevel_allowed = PETSC_FALSE;
3452   multilevel_requested = PETSC_FALSE;
3453   if (pcbddc->current_level < pcbddc->max_levels) {
3454     multilevel_requested = PETSC_TRUE;
3455     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3456       multilevel_allowed = PETSC_FALSE;
3457     } else {
3458       multilevel_allowed = PETSC_TRUE;
3459     }
3460   }
3461   /* determine number of process partecipating to coarse solver */
3462   if (multilevel_allowed) {
3463     ncoarse = ncoarse_ml;
3464     csin = csin_ml;
3465   } else {
3466     ncoarse = ncoarse_ds;
3467     csin = csin_ds;
3468   }
3469 
3470   /* creates temporary l2gmap and IS for coarse indexes */
3471   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3472   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3473 
3474   /* creates temporary MATIS object for coarse matrix */
3475   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3476   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3477   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3478   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3479   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3480   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3481 
3482   /* compute dofs splitting and neumann boundaries for coarse dofs */
3483   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3484     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3485     const PetscInt         *idxs;
3486     ISLocalToGlobalMapping tmap;
3487 
3488     /* create map between primal indices (in local representative ordering) and local primal numbering */
3489     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3490     /* allocate space for temporary storage */
3491     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3492     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3493     /* allocate for IS array */
3494     nisdofs = pcbddc->n_ISForDofsLocal;
3495     nisneu = !!pcbddc->NeumannBoundariesLocal;
3496     nis = nisdofs + nisneu;
3497     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3498     /* dofs splitting */
3499     for (i=0;i<nisdofs;i++) {
3500       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3501       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3502       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3503       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3504       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3505       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3506       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3507       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3508     }
3509     /* neumann boundaries */
3510     if (pcbddc->NeumannBoundariesLocal) {
3511       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3512       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3513       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3514       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3515       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3516       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3517       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3518       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3519     }
3520     /* free memory */
3521     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3522     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3523     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3524   } else {
3525     nis = 0;
3526     nisdofs = 0;
3527     nisneu = 0;
3528     isarray = NULL;
3529   }
3530   /* destroy no longer needed map */
3531   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3532 
3533   /* restrict on coarse candidates (if needed) */
3534   coarse_mat_is = NULL;
3535   if (csin) {
3536     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3537       PetscInt j,tissize,*nisindices;
3538       PetscInt *coarse_candidates;
3539       const PetscInt* tisindices;
3540       /* get coarse candidates' ranks in pc communicator */
3541       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3542       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3543       for (i=0,j=0;i<all_procs;i++) {
3544         if (!coarse_candidates[i]) {
3545           coarse_candidates[j]=i;
3546           j++;
3547         }
3548       }
3549       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3550       /* get a suitable subassembling pattern */
3551       if (csin_type_simple) {
3552         PetscMPIInt rank;
3553         PetscInt    issize,isidx;
3554         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3555         if (im_active) {
3556           issize = 1;
3557           isidx = (PetscInt)rank;
3558         } else {
3559           issize = 0;
3560           isidx = -1;
3561         }
3562         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3563       } else {
3564         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3565       }
3566       if (pcbddc->dbg_flag) {
3567         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3568         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3569         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3570         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3571         for (i=0;i<j;i++) {
3572           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3573         }
3574         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3575         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3576       }
3577       /* shift the pattern on coarse candidates */
3578       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3579       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3580       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3581       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3582       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3583       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3584       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3585     }
3586     if (pcbddc->dbg_flag) {
3587       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3588       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3589       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3590       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3591     }
3592     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3593     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3594   } else {
3595     if (pcbddc->dbg_flag) {
3596       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3597       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3598       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3599     }
3600     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3601     coarse_mat_is = t_coarse_mat_is;
3602   }
3603 
3604   /* create local to global scatters for coarse problem */
3605   if (pcbddc->new_primal_space) {
3606     PetscInt lrows;
3607     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3608     if (coarse_mat_is) {
3609       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3610     } else {
3611       lrows = 0;
3612     }
3613     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3614     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3615     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3616     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3617     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3618   }
3619   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3620   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3621 
3622   /* set defaults for coarse KSP and PC */
3623   if (multilevel_allowed) {
3624     coarse_ksp_type = KSPRICHARDSON;
3625     coarse_pc_type = PCBDDC;
3626   } else {
3627     coarse_ksp_type = KSPPREONLY;
3628     coarse_pc_type = PCREDUNDANT;
3629   }
3630 
3631   /* print some info if requested */
3632   if (pcbddc->dbg_flag) {
3633     if (!multilevel_allowed) {
3634       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3635       if (multilevel_requested) {
3636         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3637       } else if (pcbddc->max_levels) {
3638         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3639       }
3640       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3641     }
3642   }
3643 
3644   /* create the coarse KSP object only once with defaults */
3645   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3646   issym = PETSC_FALSE;
3647   isherm = PETSC_FALSE;
3648   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3649   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3650   if (coarse_mat_is) {
3651     MatReuse coarse_mat_reuse;
3652     PetscViewer dbg_viewer;
3653     if (pcbddc->dbg_flag) {
3654       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3655       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3656     }
3657     if (!pcbddc->coarse_ksp) {
3658       char prefix[256],str_level[16];
3659       size_t len;
3660       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3661       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3662       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3663       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr);
3664       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3665       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3666       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3667       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3668       /* prefix */
3669       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3670       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3671       if (!pcbddc->current_level) {
3672         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3673         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3674       } else {
3675         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3676         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3677         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3678         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3679         *(prefix+len)='\0';
3680         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3681         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3682       }
3683       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3684       /* allow user customization */
3685       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3686     }
3687 
3688     /* get some info after set from options */
3689     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3690     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3691     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3692     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3693       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3694       isbddc = PETSC_FALSE;
3695     }
3696 
3697     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3698     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3699     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3700     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3701     if (nisdofs) {
3702       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3703       for (i=0;i<nisdofs;i++) {
3704         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3705       }
3706     }
3707     if (nisneu) {
3708       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3709       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3710     }
3711 
3712     /* assemble coarse matrix */
3713     if (coarse_reuse) {
3714       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3715       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3716       coarse_mat_reuse = MAT_REUSE_MATRIX;
3717     } else {
3718       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3719     }
3720     if (isbddc || isnn) {
3721       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3722         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3723         if (pcbddc->dbg_flag) {
3724           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3725           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3726           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3727           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3728         }
3729       }
3730       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3731     } else {
3732       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3733     }
3734     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3735 
3736     /* propagate symmetry info to coarse matrix */
3737     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3738     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3739 
3740     /* set operators */
3741     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3742     if (pcbddc->dbg_flag) {
3743       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3744     }
3745   } else { /* processes non partecipating to coarse solver (if any) */
3746     coarse_mat = 0;
3747   }
3748   ierr = PetscFree(isarray);CHKERRQ(ierr);
3749 
3750 /* temporary disabled code */
3751 #if 0
3752   /* Compute coarse null space (special handling by BDDC only) */
3753   if (pcbddc->NullSpace) {
3754     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3755     if (isbddc) {
3756       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3757     } else {
3758       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3759     }
3760   }
3761 #endif
3762 
3763   if (pcbddc->coarse_ksp) {
3764     Vec crhs,csol;
3765     PetscBool ispreonly;
3766     /* setup coarse ksp */
3767     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3768     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3769     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3770     /* hack */
3771     if (!csol) {
3772       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3773     }
3774     if (!crhs) {
3775       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3776     }
3777     /* Check coarse problem if in debug mode or if solving with an iterative method */
3778     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3779     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3780       KSP       check_ksp;
3781       KSPType   check_ksp_type;
3782       PC        check_pc;
3783       Vec       check_vec,coarse_vec;
3784       PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3785       PetscInt  its;
3786       PetscBool compute_eigs;
3787       PetscReal *eigs_r,*eigs_c;
3788       PetscInt  neigs;
3789 
3790       /* Create ksp object suitable for estimation of extreme eigenvalues */
3791       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3792       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3793       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3794       if (ispreonly) {
3795         check_ksp_type = KSPPREONLY;
3796         compute_eigs = PETSC_FALSE;
3797       } else {
3798         check_ksp_type = KSPGMRES;
3799         compute_eigs = PETSC_TRUE;
3800       }
3801       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3802       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3803       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3804       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3805       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3806       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3807       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3808       /* create random vec */
3809       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3810       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3811       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3812       if (CoarseNullSpace) {
3813         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3814       }
3815       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3816       /* solve coarse problem */
3817       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3818       if (CoarseNullSpace) {
3819         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3820       }
3821       /* set eigenvalue estimation if preonly has not been requested */
3822       if (compute_eigs) {
3823         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3824         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3825         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3826         lambda_max = eigs_r[neigs-1];
3827         lambda_min = eigs_r[0];
3828         if (pcbddc->use_coarse_estimates) {
3829           if (lambda_max>lambda_min) {
3830             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3831             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3832           }
3833         }
3834       }
3835 
3836       /* check coarse problem residual error */
3837       if (pcbddc->dbg_flag) {
3838         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3839         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3840         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3841         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3842         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3843         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3844         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3845         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3846         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3847         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3848         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3849         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3850         if (compute_eigs) {
3851           PetscReal lambda_max_s,lambda_min_s;
3852           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3853           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3854           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3855           for (i=0;i<neigs;i++) {
3856             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3857           }
3858         }
3859         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3860         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3861       }
3862       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3863       if (compute_eigs) {
3864         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3865         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3866       }
3867     }
3868   }
3869   /* print additional info */
3870   if (pcbddc->dbg_flag) {
3871     /* waits until all processes reaches this point */
3872     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3873     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3874     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3875   }
3876 
3877   /* free memory */
3878   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3879   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3880   PetscFunctionReturn(0);
3881 }
3882 
3883 #undef __FUNCT__
3884 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3885 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3886 {
3887   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3888   PC_IS*         pcis = (PC_IS*)pc->data;
3889   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3890   PetscInt       i,coarse_size;
3891   PetscInt       *local_primal_indices;
3892   PetscErrorCode ierr;
3893 
3894   PetscFunctionBegin;
3895   /* Compute global number of coarse dofs */
3896   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3897     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3898   }
3899   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3900 
3901   /* check numbering */
3902   if (pcbddc->dbg_flag) {
3903     PetscScalar coarsesum,*array;
3904 
3905     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3906     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3907     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3908     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3909     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3910     for (i=0;i<pcbddc->local_primal_size;i++) {
3911       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3912     }
3913     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3914     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3915     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3916     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3917     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3918     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3919     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3920     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3921     for (i=0;i<pcis->n;i++) {
3922       if (array[i] == 1.0) {
3923         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3924       }
3925     }
3926     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3927     for (i=0;i<pcis->n;i++) {
3928       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3929     }
3930     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3931     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3932     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3933     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3934     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3935     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3936     if (pcbddc->dbg_flag > 1) {
3937       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3938       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3939       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3940       for (i=0;i<pcbddc->local_primal_size;i++) {
3941         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3942       }
3943       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3944     }
3945     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3946   }
3947   /* get back data */
3948   *coarse_size_n = coarse_size;
3949   *local_primal_indices_n = local_primal_indices;
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 #undef __FUNCT__
3954 #define __FUNCT__ "PCBDDCGlobalToLocal"
3955 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3956 {
3957   PC_IS*         pcis = (PC_IS*)pc->data;
3958   IS             localis_t;
3959   PetscInt       i,lsize,*idxs;
3960   PetscScalar    *vals;
3961   PetscErrorCode ierr;
3962 
3963   PetscFunctionBegin;
3964   /* get dirichlet indices in local ordering exploiting local to global map */
3965   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3966   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3967   for (i=0;i<lsize;i++) vals[i] = 1.0;
3968   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3969   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3970   if (idxs) { /* multilevel guard */
3971     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3972   }
3973   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3974   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3975   ierr = PetscFree(vals);CHKERRQ(ierr);
3976   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3977   /* now compute dirichlet set in local ordering */
3978   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3979   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3980   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3981   for (i=0,lsize=0;i<pcis->n;i++) {
3982     if (vals[i] == 1.0) {
3983       lsize++;
3984     }
3985   }
3986   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3987   for (i=0,lsize=0;i<pcis->n;i++) {
3988     if (vals[i] == 1.0) {
3989       idxs[lsize++] = i;
3990     }
3991   }
3992   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3993   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3994   *localis = localis_t;
3995   PetscFunctionReturn(0);
3996 }
3997