1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 106 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 108 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 110 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 113 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 117 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 118 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 119 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 120 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 121 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 122 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "PCBDDCCreateWorkVectors" 128 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 129 { 130 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 131 PC_IS *pcis = (PC_IS*)pc->data; 132 VecType impVecType; 133 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 134 PetscErrorCode ierr; 135 136 PetscFunctionBegin; 137 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 138 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 139 local_primal_size = n_constraints+n_vertices; 140 n_R = pcis->n-n_vertices; 141 /* local work vectors */ 142 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 143 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 149 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 150 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 151 if (n_constraints) { 152 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 153 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 154 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 161 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 162 { 163 PetscErrorCode ierr; 164 /* pointers to pcis and pcbddc */ 165 PC_IS* pcis = (PC_IS*)pc->data; 166 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 167 /* submatrices of local problem */ 168 Mat A_RV,A_VR,A_VV; 169 /* working matrices */ 170 Mat M1,M2,M3,C_CR; 171 /* working vectors */ 172 Vec vec1_C,vec2_C,vec1_V,vec2_V; 173 /* additional working stuff */ 174 IS is_aux; 175 ISLocalToGlobalMapping BtoNmap; 176 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 177 const PetscScalar *array,*row_cmat_values; 178 const PetscInt *row_cmat_indices,*idx_R_local; 179 PetscInt *vertices,*idx_V_B,*auxindices; 180 PetscInt n_vertices,n_constraints,size_of_constraint; 181 PetscInt i,j,n_R,n_D,n_B; 182 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 183 /* Vector and matrix types */ 184 VecType impVecType; 185 MatType impMatType; 186 /* some shortcuts to scalars */ 187 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 188 /* for debugging purposes */ 189 PetscReal *coarsefunctions_errors,*constraints_errors; 190 191 PetscFunctionBegin; 192 /* get number of vertices and their local indices */ 193 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 194 n_constraints = pcbddc->local_primal_size-n_vertices; 195 /* Set Non-overlapping dimensions */ 196 n_B = pcis->n_B; n_D = pcis->n - n_B; 197 n_R = pcis->n-n_vertices; 198 199 /* Set types for local objects needed by BDDC precondtioner */ 200 impMatType = MATSEQDENSE; 201 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 202 203 /* Allocating some extra storage just to be safe */ 204 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 205 for (i=0;i<pcis->n;i++) auxindices[i]=i; 206 207 /* vertices in boundary numbering */ 208 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 209 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 210 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 211 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 212 if (i != n_vertices) { 213 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 214 } 215 216 /* some work vectors on vertices and/or constraints */ 217 if (n_vertices) { 218 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 219 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 220 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 221 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 222 } 223 if (n_constraints) { 224 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 225 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 226 } 227 228 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 229 if (n_constraints) { 230 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 231 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 232 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 233 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 234 235 /* Extract constraints on R nodes: C_{CR} */ 236 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 237 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 238 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 239 240 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 241 for (i=0;i<n_constraints;i++) { 242 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 243 /* Get row of constraint matrix in R numbering */ 244 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 245 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 246 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 247 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 248 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 249 /* Solve for row of constraint matrix in R numbering */ 250 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 251 /* Set values in local_auxmat2 */ 252 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 253 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 254 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 255 } 256 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 259 260 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 261 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 262 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 263 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 264 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 265 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 266 ierr = MatSetUp(M1);CHKERRQ(ierr); 267 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 268 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 269 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 270 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 274 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 275 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 276 } 277 278 /* Get submatrices from subdomain matrix */ 279 if (n_vertices) { 280 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 281 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 283 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 284 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 285 } 286 287 /* Matrix of coarse basis functions (local) */ 288 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 289 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 290 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 291 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 292 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 293 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 294 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 295 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 296 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 297 } 298 299 if (pcbddc->dbg_flag) { 300 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 301 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 303 } 304 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 305 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 306 307 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 308 309 /* vertices */ 310 for (i=0;i<n_vertices;i++) { 311 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 312 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 313 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 314 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 315 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 316 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 318 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 319 if (n_constraints) { 320 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 321 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 322 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 323 } 324 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 325 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 326 327 /* Set values in coarse basis function and subdomain part of coarse_mat */ 328 /* coarse basis functions */ 329 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 330 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 331 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 333 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 334 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 335 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 336 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 337 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 338 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 340 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 342 } 343 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 344 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 345 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 347 if (n_constraints) { 348 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 349 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 350 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 351 } 352 353 /* check */ 354 if (pcbddc->dbg_flag) { 355 /* assemble subdomain vector on local nodes */ 356 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 357 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 358 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 360 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 362 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 363 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 364 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 365 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 366 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 367 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 368 if (n_constraints) { 369 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 370 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 371 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 372 } 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 376 /* check saddle point solution */ 377 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 378 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 379 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 380 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 381 /* shift by the identity matrix */ 382 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 385 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 386 } 387 } 388 389 /* constraints */ 390 for (i=0;i<n_constraints;i++) { 391 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 392 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 393 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 394 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 395 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 396 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 397 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 399 if (n_vertices) { 400 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 401 } 402 /* Set values in coarse basis function and subdomain part of coarse_mat */ 403 /* coarse basis functions */ 404 j = i+n_vertices; /* don't touch this! */ 405 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 406 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 411 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 412 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 414 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 415 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 417 } 418 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 419 if (n_vertices) { 420 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 421 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 423 } 424 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 425 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 427 428 if (pcbddc->dbg_flag) { 429 /* assemble subdomain vector on nodes */ 430 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 431 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 434 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 436 /* assemble subdomain vector of lagrange multipliers */ 437 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 438 if (n_vertices) { 439 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 442 } 443 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 444 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 445 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 449 /* check saddle point solution */ 450 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 451 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 452 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 453 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 454 /* shift by the identity matrix */ 455 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 459 } 460 } 461 /* call assembling routines for local coarse basis */ 462 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 } 468 469 /* compute other basis functions for non-symmetric problems */ 470 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 471 if (!setsym || (setsym && !issym)) { 472 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 473 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 474 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 475 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 478 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 479 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 480 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 } 482 for (i=0;i<pcbddc->local_primal_size;i++) { 483 if (n_constraints) { 484 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 485 for (j=0;j<n_constraints;j++) { 486 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 487 } 488 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 489 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 490 } 491 if (i<n_vertices) { 492 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 493 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 496 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 497 if (n_constraints) { 498 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 499 } 500 } else { 501 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 504 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 505 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 508 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 510 if (i<n_vertices) { 511 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 512 } 513 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 514 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 515 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 517 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 518 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 519 } 520 521 if (pcbddc->dbg_flag) { 522 /* assemble subdomain vector on nodes */ 523 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 524 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 525 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 527 if (i<n_vertices) { 528 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 532 /* assemble subdomain vector of lagrange multipliers */ 533 for (j=0;j<pcbddc->local_primal_size;j++) { 534 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 535 } 536 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 538 /* check saddle point solution */ 539 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 540 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 541 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 542 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 543 /* shift by the identity matrix */ 544 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 545 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 546 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 547 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 548 } 549 } 550 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } 556 } 557 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 558 /* Checking coarse_sub_mat and coarse basis functios */ 559 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 560 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 561 if (pcbddc->dbg_flag) { 562 Mat coarse_sub_mat; 563 Mat AUXMAT,TM1,TM2,TM3,TM4; 564 Mat coarse_phi_D,coarse_phi_B; 565 Mat coarse_psi_D,coarse_psi_B; 566 Mat A_II,A_BB,A_IB,A_BI; 567 MatType checkmattype=MATSEQAIJ; 568 PetscReal real_value; 569 570 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 571 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 572 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 573 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 574 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 575 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 576 if (pcbddc->coarse_psi_B) { 577 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 578 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 579 } 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 581 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 584 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 585 if (pcbddc->coarse_psi_B) { 586 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 593 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 594 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 595 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 } else { 599 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 601 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 } 608 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 609 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 610 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 611 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 612 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 613 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 618 for (i=0;i<pcbddc->local_primal_size;i++) { 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 622 for (i=0;i<pcbddc->local_primal_size;i++) { 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 624 } 625 if (pcbddc->coarse_psi_B) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 627 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 629 } 630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 631 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 636 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 637 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 638 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 640 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 641 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 642 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 644 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 645 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 646 if (pcbddc->coarse_psi_B) { 647 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 648 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 649 } 650 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 651 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 652 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 653 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 654 } 655 /* free memory */ 656 if (n_vertices) { 657 ierr = PetscFree(vertices);CHKERRQ(ierr); 658 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 659 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 660 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 661 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 662 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 663 } 664 if (n_constraints) { 665 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 666 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 667 ierr = MatDestroy(&M1);CHKERRQ(ierr); 668 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 669 } 670 ierr = PetscFree(auxindices);CHKERRQ(ierr); 671 /* get back data */ 672 *coarse_submat_vals_n = coarse_submat_vals; 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 678 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 679 { 680 PC_IS* pcis = (PC_IS*)(pc->data); 681 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 682 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 683 /* manage repeated solves */ 684 MatReuse reuse; 685 MatStructure matstruct; 686 PetscErrorCode ierr; 687 688 PetscFunctionBegin; 689 /* get mat flags */ 690 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 691 reuse = MAT_INITIAL_MATRIX; 692 if (pc->setupcalled) { 693 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 694 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 695 if (matstruct == SAME_NONZERO_PATTERN) { 696 reuse = MAT_REUSE_MATRIX; 697 } else { 698 reuse = MAT_INITIAL_MATRIX; 699 } 700 } 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 705 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 707 } 708 709 /* transform local matrices if needed */ 710 if (pcbddc->use_change_of_basis) { 711 Mat change_mat_all; 712 PetscScalar *row_cmat_values; 713 PetscInt *row_cmat_indices; 714 PetscInt *nnz,*is_indices,*temp_indices; 715 PetscInt i,j,k,n_D,n_B; 716 717 /* Get Non-overlapping dimensions */ 718 n_B = pcis->n_B; 719 n_D = pcis->n-n_B; 720 721 /* compute nonzero structure of change of basis on all local nodes */ 722 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 725 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 727 k=1; 728 for (i=0;i<n_B;i++) { 729 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 730 nnz[is_indices[i]]=j; 731 if (k < j) k = j; 732 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 733 } 734 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 735 /* assemble change of basis matrix on the whole set of local dofs */ 736 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 737 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 738 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 739 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 740 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 for (i=0;i<n_D;i++) { 743 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 744 } 745 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 747 for (i=0;i<n_B;i++) { 748 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 749 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 750 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 752 } 753 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 756 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 757 if (i==1) { 758 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 759 } else { 760 Mat work_mat; 761 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 762 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 763 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 764 } 765 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 766 ierr = PetscFree(nnz);CHKERRQ(ierr); 767 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 768 } else { 769 /* without change of basis, the local matrix is unchanged */ 770 if (!pcbddc->local_mat) { 771 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 772 pcbddc->local_mat = matis->A; 773 } 774 } 775 776 /* get submatrices */ 777 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 778 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 779 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 780 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 786 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 787 { 788 PC_IS* pcis = (PC_IS*)(pc->data); 789 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 790 IS is_aux1,is_aux2; 791 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 792 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 793 PetscBT bitmask; 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 /* Set Non-overlapping dimensions */ 798 n_B = pcis->n_B; n_D = pcis->n - n_B; 799 /* get vertex indices from constraint matrix */ 800 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 801 /* Set number of constraints */ 802 n_constraints = pcbddc->local_primal_size-n_vertices; 803 /* create auxiliary bitmask */ 804 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 805 for (i=0;i<n_vertices;i++) { 806 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 807 } 808 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 809 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 810 for (i=0, n_R=0; i<pcis->n; i++) { 811 if (!PetscBTLookup(bitmask,i)) { 812 idx_R_local[n_R] = i; 813 n_R++; 814 } 815 } 816 ierr = PetscFree(vertices);CHKERRQ(ierr); 817 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 818 819 /* print some info if requested */ 820 if (pcbddc->dbg_flag) { 821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 827 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 828 } 829 830 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 831 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 832 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 833 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 834 for (i=0; i<n_D; i++) { 835 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 836 } 837 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 838 for (i=0, j=0; i<n_R; i++) { 839 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 840 aux_array1[j++] = i; 841 } 842 } 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 844 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 845 for (i=0, j=0; i<n_B; i++) { 846 if (!PetscBTLookup(bitmask,is_indices[i])) { 847 aux_array2[j++] = i; 848 } 849 } 850 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 851 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 852 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 854 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 855 856 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 857 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 858 for (i=0, j=0; i<n_R; i++) { 859 if (PetscBTLookup(bitmask,idx_R_local[i])) { 860 aux_array1[j++] = i; 861 } 862 } 863 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 864 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 866 } 867 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 873 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 874 { 875 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 876 877 PetscFunctionBegin; 878 pcbddc->use_exact_dirichlet=use; 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 884 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 885 { 886 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 887 PC_IS *pcis = (PC_IS*)pc->data; 888 PC pc_temp; 889 Mat A_RR; 890 Vec vec1,vec2,vec3; 891 MatStructure matstruct; 892 PetscScalar m_one = -1.0; 893 PetscReal value; 894 PetscInt n_D,n_R,use_exact,use_exact_reduced; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 /* Creating PC contexts for local Dirichlet and Neumann problems */ 899 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 900 901 /* DIRICHLET PROBLEM */ 902 /* Matrix for Dirichlet problem is pcis->A_II */ 903 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 904 if (!pcbddc->ksp_D) { /* create object if not yet build */ 905 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 906 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 907 /* default */ 908 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 909 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 910 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 911 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 912 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 913 } 914 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 915 /* Allow user's customization */ 916 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 917 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 918 if (!n_D) { 919 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 920 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 921 } 922 /* Set Up KSP for Dirichlet problem of BDDC */ 923 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 924 /* set ksp_D into pcis data */ 925 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 926 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 927 pcis->ksp_D = pcbddc->ksp_D; 928 929 /* NEUMANN PROBLEM */ 930 /* Matrix for Neumann problem is A_RR -> we need to create it */ 931 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 933 if (!pcbddc->ksp_R) { /* create object if not yet build */ 934 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 935 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 936 /* default */ 937 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 938 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 940 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 941 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 942 } 943 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 944 /* Allow user's customization */ 945 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 947 if (!n_R) { 948 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 950 } 951 /* Set Up KSP for Neumann problem of BDDC */ 952 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 953 954 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 955 956 /* Dirichlet */ 957 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 958 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 959 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 960 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 961 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 962 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 963 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 966 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 967 /* need to be adapted? */ 968 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 970 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 971 /* print info */ 972 if (pcbddc->dbg_flag) { 973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 978 } 979 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 980 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 981 } 982 983 /* Neumann */ 984 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 985 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 986 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 987 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 988 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 989 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 990 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 991 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 992 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 993 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 994 /* need to be adapted? */ 995 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 996 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 997 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 998 /* print info */ 999 if (pcbddc->dbg_flag) { 1000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1002 } 1003 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1005 } 1006 1007 /* free Neumann problem's matrix */ 1008 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1014 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1015 { 1016 PetscErrorCode ierr; 1017 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1018 1019 PetscFunctionBegin; 1020 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1021 if (pcbddc->local_auxmat1) { 1022 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1023 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1024 } 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1030 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1031 { 1032 PetscErrorCode ierr; 1033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1034 PC_IS* pcis = (PC_IS*) (pc->data); 1035 const PetscScalar zero = 0.0; 1036 1037 PetscFunctionBegin; 1038 /* Application of PHI^T (or PSI^T) */ 1039 if (pcbddc->coarse_psi_B) { 1040 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1041 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1042 } else { 1043 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1044 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1045 } 1046 /* Scatter data of coarse_rhs */ 1047 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1048 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 1050 /* Local solution on R nodes */ 1051 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1054 if (pcbddc->inexact_prec_type) { 1055 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 } 1058 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1059 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 if (pcbddc->inexact_prec_type) { 1063 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 } 1066 1067 /* Coarse solution */ 1068 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1070 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1071 } 1072 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1074 1075 /* Sum contributions from two levels */ 1076 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1077 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1083 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1084 { 1085 PetscErrorCode ierr; 1086 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1087 1088 PetscFunctionBegin; 1089 switch (pcbddc->coarse_communications_type) { 1090 case SCATTERS_BDDC: 1091 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1092 break; 1093 case GATHERS_BDDC: 1094 break; 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1101 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1102 { 1103 PetscErrorCode ierr; 1104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1105 PetscScalar* array_to; 1106 PetscScalar* array_from; 1107 MPI_Comm comm; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1112 switch (pcbddc->coarse_communications_type) { 1113 case SCATTERS_BDDC: 1114 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1115 break; 1116 case GATHERS_BDDC: 1117 if (vec_from) { 1118 ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr); 1119 } 1120 if (vec_to) { 1121 ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr); 1122 } 1123 switch(pcbddc->coarse_problem_type){ 1124 case SEQUENTIAL_BDDC: 1125 if (smode == SCATTER_FORWARD) { 1126 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1127 if (vec_to) { 1128 if (imode == ADD_VALUES) { 1129 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1130 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1131 } 1132 } else { 1133 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1134 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1135 } 1136 } 1137 } 1138 } else { 1139 if (vec_from) { 1140 if (imode == ADD_VALUES) { 1141 MPI_Comm vec_from_comm; 1142 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr); 1143 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type); 1144 } 1145 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1146 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1147 } 1148 } 1149 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1150 } 1151 break; 1152 case REPLICATED_BDDC: 1153 if (smode == SCATTER_FORWARD) { 1154 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1155 if (imode == ADD_VALUES) { 1156 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1157 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1158 } 1159 } else { 1160 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1161 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1162 } 1163 } 1164 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1165 if (imode == ADD_VALUES) { 1166 for (i=0;i<pcbddc->local_primal_size;i++) { 1167 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 1168 } 1169 } else { 1170 for (i=0;i<pcbddc->local_primal_size;i++) { 1171 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1172 } 1173 } 1174 } 1175 break; 1176 case MULTILEVEL_BDDC: 1177 break; 1178 case PARALLEL_BDDC: 1179 break; 1180 } 1181 if (vec_from) { 1182 ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr); 1183 } 1184 if (vec_to) { 1185 ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr); 1186 } 1187 break; 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* uncomment for testing purposes */ 1193 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1196 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1197 { 1198 PetscErrorCode ierr; 1199 PC_IS* pcis = (PC_IS*)(pc->data); 1200 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1201 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1202 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1203 MatType impMatType=MATSEQAIJ; 1204 /* one and zero */ 1205 PetscScalar one=1.0,zero=0.0; 1206 /* space to store constraints and their local indices */ 1207 PetscScalar *temp_quadrature_constraint; 1208 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1209 /* iterators */ 1210 PetscInt i,j,k,total_counts,temp_start_ptr; 1211 /* stuff to store connected components stored in pcbddc->mat_graph */ 1212 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1213 PetscInt n_ISForFaces,n_ISForEdges; 1214 PetscBool get_faces,get_edges,get_vertices; 1215 /* near null space stuff */ 1216 MatNullSpace nearnullsp; 1217 const Vec *nearnullvecs; 1218 Vec *localnearnullsp; 1219 PetscBool nnsp_has_cnst; 1220 PetscInt nnsp_size; 1221 PetscScalar *array; 1222 /* BLAS integers */ 1223 PetscBLASInt lwork,lierr; 1224 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1225 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1226 /* LAPACK working arrays for SVD or POD */ 1227 PetscBool skip_lapack; 1228 PetscScalar *work; 1229 PetscReal *singular_vals; 1230 #if defined(PETSC_USE_COMPLEX) 1231 PetscReal *rwork; 1232 #endif 1233 #if defined(PETSC_MISSING_LAPACK_GESVD) 1234 PetscBLASInt Blas_one_2=1; 1235 PetscScalar *temp_basis,*correlation_mat; 1236 #else 1237 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1238 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1239 #endif 1240 /* change of basis */ 1241 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1242 PetscBool boolforchange; 1243 PetscBT touched,change_basis; 1244 /* auxiliary stuff */ 1245 PetscInt *nnz,*is_indices,*local_to_B; 1246 /* some quantities */ 1247 PetscInt n_vertices,total_primal_vertices; 1248 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1249 1250 1251 PetscFunctionBegin; 1252 /* Get index sets for faces, edges and vertices from graph */ 1253 get_faces = PETSC_TRUE; 1254 get_edges = PETSC_TRUE; 1255 get_vertices = PETSC_TRUE; 1256 if (pcbddc->vertices_flag) { 1257 get_faces = PETSC_FALSE; 1258 get_edges = PETSC_FALSE; 1259 } 1260 if (pcbddc->constraints_flag) { 1261 get_vertices = PETSC_FALSE; 1262 } 1263 if (pcbddc->faces_flag) { 1264 get_edges = PETSC_FALSE; 1265 } 1266 if (pcbddc->edges_flag) { 1267 get_faces = PETSC_FALSE; 1268 } 1269 /* default */ 1270 if (!get_faces && !get_edges && !get_vertices) { 1271 get_vertices = PETSC_TRUE; 1272 } 1273 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1274 /* print some info */ 1275 if (pcbddc->dbg_flag) { 1276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1277 i = 0; 1278 if (ISForVertices) { 1279 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1280 } 1281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 } 1286 /* check if near null space is attached to global mat */ 1287 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1288 if (nearnullsp) { 1289 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1290 } else { /* if near null space is not provided BDDC uses constants by default */ 1291 nnsp_size = 0; 1292 nnsp_has_cnst = PETSC_TRUE; 1293 } 1294 /* get max number of constraints on a single cc */ 1295 max_constraints = nnsp_size; 1296 if (nnsp_has_cnst) max_constraints++; 1297 1298 /* 1299 Evaluate maximum storage size needed by the procedure 1300 - temp_indices will contain start index of each constraint stored as follows 1301 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1302 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1303 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1304 */ 1305 total_counts = n_ISForFaces+n_ISForEdges; 1306 total_counts *= max_constraints; 1307 n_vertices = 0; 1308 if (ISForVertices) { 1309 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1310 } 1311 total_counts += n_vertices; 1312 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1313 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1314 total_counts = 0; 1315 max_size_of_constraint = 0; 1316 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1317 if (i<n_ISForEdges) { 1318 used_IS = &ISForEdges[i]; 1319 } else { 1320 used_IS = &ISForFaces[i-n_ISForEdges]; 1321 } 1322 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1323 total_counts += j; 1324 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1325 } 1326 total_counts *= max_constraints; 1327 total_counts += n_vertices; 1328 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1329 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1330 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1331 /* local to boundary numbering */ 1332 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1333 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1334 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1335 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1336 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1337 /* get local part of global near null space vectors */ 1338 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1339 for (k=0;k<nnsp_size;k++) { 1340 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1341 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1342 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1343 } 1344 1345 /* whether or not to skip lapack calls */ 1346 skip_lapack = PETSC_TRUE; 1347 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1348 1349 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1350 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1351 PetscScalar temp_work; 1352 #if defined(PETSC_MISSING_LAPACK_GESVD) 1353 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1354 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1355 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1356 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1357 #if defined(PETSC_USE_COMPLEX) 1358 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1359 #endif 1360 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1361 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1362 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1363 lwork = -1; 1364 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1365 #if !defined(PETSC_USE_COMPLEX) 1366 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1367 #else 1368 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1369 #endif 1370 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1371 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1372 #else /* on missing GESVD */ 1373 /* SVD */ 1374 PetscInt max_n,min_n; 1375 max_n = max_size_of_constraint; 1376 min_n = max_constraints; 1377 if (max_size_of_constraint < max_constraints) { 1378 min_n = max_size_of_constraint; 1379 max_n = max_constraints; 1380 } 1381 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1382 #if defined(PETSC_USE_COMPLEX) 1383 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1384 #endif 1385 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1386 lwork = -1; 1387 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1388 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1389 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1390 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1391 #if !defined(PETSC_USE_COMPLEX) 1392 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1393 #else 1394 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1395 #endif 1396 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1398 #endif /* on missing GESVD */ 1399 /* Allocate optimal workspace */ 1400 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1401 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1402 } 1403 /* Now we can loop on constraining sets */ 1404 total_counts = 0; 1405 temp_indices[0] = 0; 1406 /* vertices */ 1407 if (ISForVertices) { 1408 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1409 if (nnsp_has_cnst) { /* consider all vertices */ 1410 for (i=0;i<n_vertices;i++) { 1411 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1412 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1413 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1414 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1415 total_counts++; 1416 } 1417 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1418 PetscBool used_vertex; 1419 for (i=0;i<n_vertices;i++) { 1420 used_vertex = PETSC_FALSE; 1421 k = 0; 1422 while (!used_vertex && k<nnsp_size) { 1423 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1424 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1425 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1426 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1427 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1428 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1429 total_counts++; 1430 used_vertex = PETSC_TRUE; 1431 } 1432 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1433 k++; 1434 } 1435 } 1436 } 1437 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 n_vertices = total_counts; 1439 } 1440 1441 /* edges and faces */ 1442 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1443 if (i<n_ISForEdges) { 1444 used_IS = &ISForEdges[i]; 1445 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1446 } else { 1447 used_IS = &ISForFaces[i-n_ISForEdges]; 1448 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1449 } 1450 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1451 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1452 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1453 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1454 /* change of basis should not be performed on local periodic nodes */ 1455 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1456 if (nnsp_has_cnst) { 1457 PetscScalar quad_value; 1458 temp_constraints++; 1459 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1460 for (j=0;j<size_of_constraint;j++) { 1461 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1462 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1463 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1464 } 1465 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1466 if (boolforchange) { 1467 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1468 } 1469 total_counts++; 1470 } 1471 for (k=0;k<nnsp_size;k++) { 1472 PetscReal real_value; 1473 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1474 for (j=0;j<size_of_constraint;j++) { 1475 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1476 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1477 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1478 } 1479 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1480 /* check if array is null on the connected component */ 1481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1482 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1483 if (real_value > 0.0) { /* keep indices and values */ 1484 temp_constraints++; 1485 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1486 if (boolforchange) { 1487 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1488 } 1489 total_counts++; 1490 } 1491 } 1492 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1493 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1494 if (!pcbddc->use_nnsp_true) { 1495 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1496 1497 #if defined(PETSC_MISSING_LAPACK_GESVD) 1498 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1499 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1500 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1501 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1502 from that computed using LAPACKgesvd 1503 -> This is due to a different computation of eigenvectors in LAPACKheev 1504 -> The quality of the POD-computed basis will be the same */ 1505 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1506 /* Store upper triangular part of correlation matrix */ 1507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1508 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1509 for (j=0;j<temp_constraints;j++) { 1510 for (k=0;k<j+1;k++) { 1511 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1512 } 1513 } 1514 /* compute eigenvalues and eigenvectors of correlation matrix */ 1515 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1516 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1517 #if !defined(PETSC_USE_COMPLEX) 1518 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1519 #else 1520 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1521 #endif 1522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1523 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1524 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1525 j=0; 1526 while (j < temp_constraints && singular_vals[j] < tol) j++; 1527 total_counts=total_counts-j; 1528 /* scale and copy POD basis into used quadrature memory */ 1529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1533 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1535 if (j<temp_constraints) { 1536 PetscInt ii; 1537 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1539 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1541 for (k=0;k<temp_constraints-j;k++) { 1542 for (ii=0;ii<size_of_constraint;ii++) { 1543 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1544 } 1545 } 1546 } 1547 #else /* on missing GESVD */ 1548 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1556 #endif 1557 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1559 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1560 k = temp_constraints; 1561 if (k > size_of_constraint) k = size_of_constraint; 1562 j = 0; 1563 while (j < k && singular_vals[k-j-1] < tol) j++; 1564 total_counts = total_counts-temp_constraints+k-j; 1565 #endif /* on missing GESVD */ 1566 } 1567 } 1568 /* free index sets of faces, edges and vertices */ 1569 for (i=0;i<n_ISForFaces;i++) { 1570 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1571 } 1572 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1573 for (i=0;i<n_ISForEdges;i++) { 1574 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1575 } 1576 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1577 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1578 1579 /* free workspace */ 1580 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1581 ierr = PetscFree(work);CHKERRQ(ierr); 1582 #if defined(PETSC_USE_COMPLEX) 1583 ierr = PetscFree(rwork);CHKERRQ(ierr); 1584 #endif 1585 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1586 #if defined(PETSC_MISSING_LAPACK_GESVD) 1587 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1588 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1589 #endif 1590 } 1591 for (k=0;k<nnsp_size;k++) { 1592 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1593 } 1594 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1595 1596 /* set quantities in pcbddc data structure */ 1597 /* n_vertices defines the number of subdomain corners in the primal space */ 1598 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1599 pcbddc->local_primal_size = total_counts; 1600 pcbddc->n_vertices = n_vertices; 1601 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1602 1603 /* Create constraint matrix */ 1604 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1605 /* so we need to set it up properly either with or without change of basis */ 1606 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1607 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1608 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1609 /* array to compute a local numbering of constraints : vertices first then constraints */ 1610 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1611 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1612 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1613 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1614 /* auxiliary stuff for basis change */ 1615 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1616 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1617 1618 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1619 total_primal_vertices=0; 1620 for (i=0;i<pcbddc->local_primal_size;i++) { 1621 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1622 if (size_of_constraint == 1) { 1623 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1624 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1625 aux_primal_minloc[total_primal_vertices]=0; 1626 total_primal_vertices++; 1627 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1628 PetscInt min_loc,min_index; 1629 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1630 /* find first untouched local node */ 1631 k = 0; 1632 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1633 min_index = global_indices[k]; 1634 min_loc = k; 1635 /* search the minimum among global nodes already untouched on the cc */ 1636 for (k=1;k<size_of_constraint;k++) { 1637 /* there can be more than one constraint on a single connected component */ 1638 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1639 min_index = global_indices[k]; 1640 min_loc = k; 1641 } 1642 } 1643 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1644 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1645 aux_primal_minloc[total_primal_vertices]=min_loc; 1646 total_primal_vertices++; 1647 } 1648 } 1649 /* free workspace */ 1650 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1651 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1652 /* permute indices in order to have a sorted set of vertices */ 1653 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1654 1655 /* nonzero structure of constraint matrix */ 1656 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1657 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1658 j=total_primal_vertices; 1659 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1660 if (!PetscBTLookup(change_basis,i)) { 1661 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1662 j++; 1663 } 1664 } 1665 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1666 ierr = PetscFree(nnz);CHKERRQ(ierr); 1667 /* set values in constraint matrix */ 1668 for (i=0;i<total_primal_vertices;i++) { 1669 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1670 } 1671 total_counts = total_primal_vertices; 1672 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1673 if (!PetscBTLookup(change_basis,i)) { 1674 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1675 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1676 total_counts++; 1677 } 1678 } 1679 /* assembling */ 1680 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 /* 1683 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1684 */ 1685 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1686 if (pcbddc->use_change_of_basis) { 1687 PetscBool qr_needed = PETSC_FALSE; 1688 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1689 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1690 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1691 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1692 /* work arrays */ 1693 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1694 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1695 /* nonzeros per row */ 1696 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1697 if (PetscBTLookup(change_basis,i)) { 1698 qr_needed = PETSC_TRUE; 1699 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1700 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1701 } 1702 } 1703 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1704 ierr = PetscFree(nnz);CHKERRQ(ierr); 1705 /* Set initial identity in the matrix */ 1706 for (i=0;i<pcis->n_B;i++) { 1707 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1708 } 1709 1710 /* Now we loop on the constraints which need a change of basis */ 1711 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1712 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1713 /* Change of basis matrix T computed via QR decomposition of constraints */ 1714 if (qr_needed) { 1715 /* dual and primal dofs on a single cc */ 1716 PetscInt dual_dofs,primal_dofs; 1717 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1718 PetscInt primal_counter; 1719 /* working stuff for GEQRF */ 1720 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1721 PetscBLASInt lqr_work; 1722 /* working stuff for UNGQR */ 1723 PetscScalar *gqr_work,lgqr_work_t; 1724 PetscBLASInt lgqr_work; 1725 /* working stuff for TRTRS */ 1726 PetscScalar *trs_rhs; 1727 PetscBLASInt Blas_NRHS; 1728 /* pointers for values insertion into change of basis matrix */ 1729 PetscInt *start_rows,*start_cols; 1730 PetscScalar *start_vals; 1731 /* working stuff for values insertion */ 1732 PetscBT is_primal; 1733 1734 /* space to store Q */ 1735 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1736 /* first we issue queries for optimal work */ 1737 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1740 lqr_work = -1; 1741 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1742 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1743 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1744 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1745 lgqr_work = -1; 1746 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1747 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1748 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1749 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1750 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1751 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1752 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1753 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1754 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1755 /* array to store scaling factors for reflectors */ 1756 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1757 /* array to store rhs and solution of triangular solver */ 1758 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1759 /* array to store whether a node is primal or not */ 1760 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1761 for (i=0;i<total_primal_vertices;i++) { 1762 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1763 } 1764 1765 /* allocating workspace for check */ 1766 if (pcbddc->dbg_flag) { 1767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1769 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1770 } 1771 1772 /* loop on constraints and see whether or not they need a change of basis */ 1773 /* -> using implicit ordering contained in temp_indices data */ 1774 total_counts = pcbddc->n_vertices; 1775 primal_counter = total_counts; 1776 while (total_counts<pcbddc->local_primal_size) { 1777 primal_dofs = 1; 1778 if (PetscBTLookup(change_basis,total_counts)) { 1779 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1780 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1781 primal_dofs++; 1782 } 1783 /* get constraint info */ 1784 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1785 dual_dofs = size_of_constraint-primal_dofs; 1786 1787 /* copy quadrature constraints for change of basis check */ 1788 if (pcbddc->dbg_flag) { 1789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1790 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1791 } 1792 1793 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1794 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1795 1796 /* compute QR decomposition of constraints */ 1797 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1798 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1799 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1800 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1801 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1802 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1803 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1804 1805 /* explictly compute R^-T */ 1806 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1807 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1808 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1809 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1812 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1813 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1814 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1815 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1816 1817 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1820 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1822 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1823 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1824 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1825 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1826 1827 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1828 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1829 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1831 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1834 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1835 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1836 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1837 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1838 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1839 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1840 1841 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1842 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1843 /* insert cols for primal dofs */ 1844 for (j=0;j<primal_dofs;j++) { 1845 start_vals = &qr_basis[j*size_of_constraint]; 1846 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1847 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1848 } 1849 /* insert cols for dual dofs */ 1850 for (j=0,k=0;j<dual_dofs;k++) { 1851 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1852 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1853 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1854 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1855 j++; 1856 } 1857 } 1858 1859 /* check change of basis */ 1860 if (pcbddc->dbg_flag) { 1861 PetscInt ii,jj; 1862 PetscBool valid_qr=PETSC_TRUE; 1863 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1869 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1870 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1871 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1872 for (jj=0;jj<size_of_constraint;jj++) { 1873 for (ii=0;ii<primal_dofs;ii++) { 1874 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1875 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1876 } 1877 } 1878 if (!valid_qr) { 1879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1880 for (jj=0;jj<size_of_constraint;jj++) { 1881 for (ii=0;ii<primal_dofs;ii++) { 1882 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1883 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1884 } 1885 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1886 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1887 } 1888 } 1889 } 1890 } else { 1891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1892 } 1893 } 1894 /* increment primal counter */ 1895 primal_counter += primal_dofs; 1896 } else { 1897 if (pcbddc->dbg_flag) { 1898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1899 } 1900 } 1901 /* increment constraint counter total_counts */ 1902 total_counts += primal_dofs; 1903 } 1904 if (pcbddc->dbg_flag) { 1905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1906 ierr = PetscFree(work);CHKERRQ(ierr); 1907 } 1908 /* free workspace */ 1909 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1910 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1911 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1912 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1913 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1914 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1915 } 1916 /* assembling */ 1917 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1918 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1919 /* 1920 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1921 */ 1922 } 1923 /* free workspace */ 1924 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1925 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1926 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1927 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1928 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1929 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1930 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1931 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1937 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1938 { 1939 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1940 PC_IS *pcis = (PC_IS*)pc->data; 1941 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1942 PetscInt bs,ierr,i,vertex_size; 1943 PetscViewer viewer=pcbddc->dbg_viewer; 1944 1945 PetscFunctionBegin; 1946 /* Init local Graph struct */ 1947 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1948 1949 /* Check validity of the csr graph passed in by the user */ 1950 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1951 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1952 } 1953 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1954 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1955 Mat mat_adj; 1956 const PetscInt *xadj,*adjncy; 1957 PetscBool flg_row=PETSC_TRUE; 1958 1959 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1960 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1961 if (!flg_row) { 1962 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1963 } 1964 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1965 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1966 if (!flg_row) { 1967 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1968 } 1969 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1970 } 1971 1972 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1973 vertex_size = 1; 1974 if (!pcbddc->n_ISForDofs) { 1975 IS *custom_ISForDofs; 1976 1977 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1978 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1979 for (i=0;i<bs;i++) { 1980 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1981 } 1982 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1983 /* remove my references to IS objects */ 1984 for (i=0;i<bs;i++) { 1985 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1986 } 1987 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1988 } else { /* mat block size as vertex size (used for elasticity) */ 1989 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1990 } 1991 1992 /* Setup of Graph */ 1993 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1994 1995 /* Graph's connected components analysis */ 1996 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1997 1998 /* print some info to stdout */ 1999 if (pcbddc->dbg_flag) { 2000 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2007 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 2008 { 2009 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2010 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 n = 0; 2015 vertices = 0; 2016 if (pcbddc->ConstraintMatrix) { 2017 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2018 for (i=0;i<local_primal_size;i++) { 2019 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2020 if (size_of_constraint == 1) n++; 2021 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2022 } 2023 if (vertices_idx) { 2024 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2025 n = 0; 2026 for (i=0;i<local_primal_size;i++) { 2027 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2028 if (size_of_constraint == 1) { 2029 vertices[n++]=row_cmat_indices[0]; 2030 } 2031 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2032 } 2033 } 2034 } 2035 *n_vertices = n; 2036 if (vertices_idx) *vertices_idx = vertices; 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2042 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 2043 { 2044 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2045 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2046 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2047 PetscBT touched; 2048 PetscErrorCode ierr; 2049 2050 PetscFunctionBegin; 2051 n = 0; 2052 constraints_index = 0; 2053 if (pcbddc->ConstraintMatrix) { 2054 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2055 max_size_of_constraint = 0; 2056 for (i=0;i<local_primal_size;i++) { 2057 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2058 if (size_of_constraint > 1) { 2059 n++; 2060 } 2061 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2062 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2063 } 2064 if (constraints_idx) { 2065 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2066 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2067 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2068 n = 0; 2069 for (i=0;i<local_primal_size;i++) { 2070 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2071 if (size_of_constraint > 1) { 2072 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2073 /* find first untouched local node */ 2074 j = 0; 2075 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2076 min_index = row_cmat_global_indices[j]; 2077 min_loc = j; 2078 /* search the minimum among nodes not yet touched on the connected component 2079 since there can be more than one constraint on a single cc */ 2080 for (j=1;j<size_of_constraint;j++) { 2081 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2082 min_index = row_cmat_global_indices[j]; 2083 min_loc = j; 2084 } 2085 } 2086 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2087 constraints_index[n++] = row_cmat_indices[min_loc]; 2088 } 2089 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2090 } 2091 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2092 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2093 } 2094 } 2095 *n_constraints = n; 2096 if (constraints_idx) *constraints_idx = constraints_index; 2097 PetscFunctionReturn(0); 2098 } 2099 2100 /* the next two functions has been adapted from pcis.c */ 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "PCBDDCApplySchur" 2103 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2104 { 2105 PetscErrorCode ierr; 2106 PC_IS *pcis = (PC_IS*)(pc->data); 2107 2108 PetscFunctionBegin; 2109 if (!vec2_B) { vec2_B = v; } 2110 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2111 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2112 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2113 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2114 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2120 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2121 { 2122 PetscErrorCode ierr; 2123 PC_IS *pcis = (PC_IS*)(pc->data); 2124 2125 PetscFunctionBegin; 2126 if (!vec2_B) { vec2_B = v; } 2127 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2128 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2129 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2130 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2131 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "PCBDDCSubsetNumbering" 2137 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2138 { 2139 Vec local_vec,global_vec; 2140 IS seqis,paris; 2141 VecScatter scatter_ctx; 2142 PetscScalar *array; 2143 PetscInt *temp_global_dofs; 2144 PetscScalar globalsum; 2145 PetscInt i,j,s; 2146 PetscInt nlocals,first_index,old_index,max_local; 2147 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2148 PetscMPIInt *dof_sizes,*dof_displs; 2149 PetscBool first_found; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 /* mpi buffers */ 2154 MPI_Comm_size(comm,&size_prec_comm); 2155 MPI_Comm_rank(comm,&rank_prec_comm); 2156 j = ( !rank_prec_comm ? size_prec_comm : 0); 2157 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2158 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2159 /* get maximum size of subset */ 2160 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2161 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2162 max_local = 0; 2163 if (n_local_dofs) { 2164 max_local = temp_global_dofs[0]; 2165 for (i=1;i<n_local_dofs;i++) { 2166 if (max_local < temp_global_dofs[i] ) { 2167 max_local = temp_global_dofs[i]; 2168 } 2169 } 2170 } 2171 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2172 max_global++; 2173 max_local = 0; 2174 if (n_local_dofs) { 2175 max_local = local_dofs[0]; 2176 for (i=1;i<n_local_dofs;i++) { 2177 if (max_local < local_dofs[i] ) { 2178 max_local = local_dofs[i]; 2179 } 2180 } 2181 } 2182 max_local++; 2183 /* allocate workspace */ 2184 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2185 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2186 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2187 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2188 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2189 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2190 /* create scatter */ 2191 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2192 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2193 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2194 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2195 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2196 /* init array */ 2197 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2198 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2199 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2200 if (local_dofs_mult) { 2201 for (i=0;i<n_local_dofs;i++) { 2202 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2203 } 2204 } else { 2205 for (i=0;i<n_local_dofs;i++) { 2206 array[local_dofs[i]]=1.0; 2207 } 2208 } 2209 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2210 /* scatter into global vec and get total number of global dofs */ 2211 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2212 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2213 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2214 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2215 /* Fill global_vec with cumulative function for global numbering */ 2216 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2217 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2218 nlocals = 0; 2219 first_index = -1; 2220 first_found = PETSC_FALSE; 2221 for (i=0;i<s;i++) { 2222 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2223 first_found = PETSC_TRUE; 2224 first_index = i; 2225 } 2226 nlocals += (PetscInt)PetscRealPart(array[i]); 2227 } 2228 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2229 if (!rank_prec_comm) { 2230 dof_displs[0]=0; 2231 for (i=1;i<size_prec_comm;i++) { 2232 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2233 } 2234 } 2235 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2236 if (first_found) { 2237 array[first_index] += (PetscScalar)nlocals; 2238 old_index = first_index; 2239 for (i=first_index+1;i<s;i++) { 2240 if (PetscRealPart(array[i]) > 0.0) { 2241 array[i] += array[old_index]; 2242 old_index = i; 2243 } 2244 } 2245 } 2246 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2247 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2248 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2249 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2250 /* get global ordering of local dofs */ 2251 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2252 if (local_dofs_mult) { 2253 for (i=0;i<n_local_dofs;i++) { 2254 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2255 } 2256 } else { 2257 for (i=0;i<n_local_dofs;i++) { 2258 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2259 } 2260 } 2261 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2262 /* free workspace */ 2263 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2264 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2265 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2266 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2267 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2268 /* return pointer to global ordering of local dofs */ 2269 *global_numbering_subset = temp_global_dofs; 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2275 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2276 { 2277 PetscInt i,j; 2278 PetscScalar *alphas; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 /* this implements stabilized Gram-Schmidt */ 2283 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2284 for (i=0;i<n;i++) { 2285 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2286 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2287 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2288 } 2289 ierr = PetscFree(alphas);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 /* BDDC requires metis 5.0.1 for multilevel */ 2294 #if defined(PETSC_HAVE_METIS) 2295 #include "metis.h" 2296 #define MetisInt idx_t 2297 #define MetisScalar real_t 2298 #endif 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2302 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2303 { 2304 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2305 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2306 PC_IS *pcis = (PC_IS*)pc->data; 2307 MPI_Comm prec_comm; 2308 MPI_Comm coarse_comm; 2309 2310 PetscInt coarse_size; 2311 2312 MatNullSpace CoarseNullSpace; 2313 2314 /* common to all choiches */ 2315 PetscScalar *temp_coarse_mat_vals; 2316 PetscScalar *ins_coarse_mat_vals; 2317 PetscInt *ins_local_primal_indices; 2318 PetscMPIInt *localsizes2,*localdispl2; 2319 PetscMPIInt size_prec_comm; 2320 PetscMPIInt rank_prec_comm; 2321 PetscMPIInt active_rank=MPI_PROC_NULL; 2322 PetscMPIInt master_proc=0; 2323 PetscInt ins_local_primal_size; 2324 /* specific to MULTILEVEL_BDDC */ 2325 PetscMPIInt *ranks_recv=0; 2326 PetscMPIInt count_recv=0; 2327 PetscMPIInt rank_coarse_proc_send_to=-1; 2328 PetscMPIInt coarse_color = MPI_UNDEFINED; 2329 ISLocalToGlobalMapping coarse_ISLG; 2330 /* some other variables */ 2331 PetscErrorCode ierr; 2332 MatType coarse_mat_type; 2333 PCType coarse_pc_type; 2334 KSPType coarse_ksp_type; 2335 PC pc_temp; 2336 PetscInt i,j,k; 2337 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2338 /* verbose output viewer */ 2339 PetscViewer viewer=pcbddc->dbg_viewer; 2340 PetscInt dbg_flag=pcbddc->dbg_flag; 2341 2342 PetscInt offset,offset2; 2343 PetscMPIInt im_active,active_procs; 2344 PetscInt *dnz,*onz; 2345 2346 PetscBool setsym,issym=PETSC_FALSE; 2347 2348 PetscFunctionBegin; 2349 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2350 ins_local_primal_indices = 0; 2351 ins_coarse_mat_vals = 0; 2352 localsizes2 = 0; 2353 localdispl2 = 0; 2354 temp_coarse_mat_vals = 0; 2355 coarse_ISLG = 0; 2356 2357 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2358 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2359 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2360 2361 /* Assign global numbering to coarse dofs */ 2362 { 2363 PetscInt *auxlocal_primal,*aux_idx; 2364 PetscMPIInt mpi_local_primal_size; 2365 PetscScalar coarsesum,*array; 2366 2367 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2368 2369 /* Construct needed data structures for message passing */ 2370 j = 0; 2371 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2372 j = size_prec_comm; 2373 } 2374 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2375 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2376 /* Gather local_primal_size information for all processes */ 2377 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2378 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2379 } else { 2380 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2381 } 2382 pcbddc->replicated_primal_size = 0; 2383 for (i=0; i<j; i++) { 2384 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2385 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2386 } 2387 2388 /* This code fragment assumes that the number of local constraints per connected component 2389 is not greater than the number of nodes defined for the connected component 2390 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2391 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2392 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2393 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2394 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2395 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2397 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2398 /* Compute number of coarse dofs */ 2399 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2400 2401 if (dbg_flag) { 2402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2403 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2404 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2405 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2406 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2407 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2408 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2409 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2410 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2411 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2412 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2413 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2414 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2415 for (i=0;i<pcis->n;i++) { 2416 if (array[i] == 1.0) { 2417 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2418 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2419 } 2420 } 2421 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2422 for (i=0;i<pcis->n;i++) { 2423 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2424 } 2425 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2426 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2427 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2428 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2429 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2430 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2432 } 2433 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2434 } 2435 2436 if (dbg_flag) { 2437 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2438 if (dbg_flag > 1) { 2439 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2442 for (i=0;i<pcbddc->local_primal_size;i++) { 2443 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2444 } 2445 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2446 } 2447 } 2448 2449 im_active = 0; 2450 if (pcis->n) im_active = 1; 2451 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2452 2453 /* adapt coarse problem type */ 2454 #if defined(PETSC_HAVE_METIS) 2455 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2456 if (pcbddc->current_level < pcbddc->max_levels) { 2457 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2458 if (dbg_flag) { 2459 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2461 } 2462 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2463 } 2464 } else { 2465 if (dbg_flag) { 2466 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2467 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2468 } 2469 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2470 } 2471 } 2472 #else 2473 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2474 #endif 2475 2476 switch(pcbddc->coarse_problem_type){ 2477 2478 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2479 { 2480 #if defined(PETSC_HAVE_METIS) 2481 /* we need additional variables */ 2482 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2483 MetisInt *metis_coarse_subdivision; 2484 MetisInt options[METIS_NOPTIONS]; 2485 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2486 PetscMPIInt procs_jumps_coarse_comm; 2487 PetscMPIInt *coarse_subdivision; 2488 PetscMPIInt *total_count_recv; 2489 PetscMPIInt *total_ranks_recv; 2490 PetscMPIInt *displacements_recv; 2491 PetscMPIInt *my_faces_connectivity; 2492 PetscMPIInt *petsc_faces_adjncy; 2493 MetisInt *faces_adjncy; 2494 MetisInt *faces_xadj; 2495 PetscMPIInt *number_of_faces; 2496 PetscMPIInt *faces_displacements; 2497 PetscInt *array_int; 2498 PetscMPIInt my_faces=0; 2499 PetscMPIInt total_faces=0; 2500 PetscInt ranks_stretching_ratio; 2501 2502 /* define some quantities */ 2503 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2504 coarse_mat_type = MATIS; 2505 coarse_pc_type = PCBDDC; 2506 coarse_ksp_type = KSPRICHARDSON; 2507 2508 /* details of coarse decomposition */ 2509 n_subdomains = active_procs; 2510 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2511 ranks_stretching_ratio = size_prec_comm/active_procs; 2512 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2513 2514 #if 0 2515 PetscMPIInt *old_ranks; 2516 PetscInt *new_ranks,*jj,*ii; 2517 MatPartitioning mat_part; 2518 IS coarse_new_decomposition,is_numbering; 2519 PetscViewer viewer_test; 2520 MPI_Comm test_coarse_comm; 2521 PetscMPIInt test_coarse_color; 2522 Mat mat_adj; 2523 /* Create new communicator for coarse problem splitting the old one */ 2524 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2525 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2526 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2527 test_coarse_comm = MPI_COMM_NULL; 2528 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2529 if (im_active) { 2530 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2531 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2532 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2534 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2535 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2536 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2537 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2538 k = pcis->n_neigh-1; 2539 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2540 ii[0]=0; 2541 ii[1]=k; 2542 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2543 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2544 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2545 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2546 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2547 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2548 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2549 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2550 printf("Setting Nparts %d\n",n_parts); 2551 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2552 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2553 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2554 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2555 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2556 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2557 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2558 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2559 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2560 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2561 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2562 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2563 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2564 } 2565 #endif 2566 2567 /* build CSR graph of subdomains' connectivity */ 2568 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2569 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2570 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2571 for (j=0;j<pcis->n_shared[i];j++){ 2572 array_int[ pcis->shared[i][j] ]+=1; 2573 } 2574 } 2575 for (i=1;i<pcis->n_neigh;i++){ 2576 for (j=0;j<pcis->n_shared[i];j++){ 2577 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2578 my_faces++; 2579 break; 2580 } 2581 } 2582 } 2583 2584 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2585 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2586 my_faces=0; 2587 for (i=1;i<pcis->n_neigh;i++){ 2588 for (j=0;j<pcis->n_shared[i];j++){ 2589 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2590 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2591 my_faces++; 2592 break; 2593 } 2594 } 2595 } 2596 if (rank_prec_comm == master_proc) { 2597 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2598 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2599 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2600 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2601 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2602 } 2603 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2604 if (rank_prec_comm == master_proc) { 2605 faces_xadj[0]=0; 2606 faces_displacements[0]=0; 2607 j=0; 2608 for (i=1;i<size_prec_comm+1;i++) { 2609 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2610 if (number_of_faces[i-1]) { 2611 j++; 2612 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2613 } 2614 } 2615 } 2616 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2617 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2618 ierr = PetscFree(array_int);CHKERRQ(ierr); 2619 if (rank_prec_comm == master_proc) { 2620 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2621 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2622 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2623 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2624 } 2625 2626 if ( rank_prec_comm == master_proc ) { 2627 2628 PetscInt heuristic_for_metis=3; 2629 2630 ncon=1; 2631 faces_nvtxs=n_subdomains; 2632 /* partition graoh induced by face connectivity */ 2633 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2634 ierr = METIS_SetDefaultOptions(options); 2635 /* we need a contiguous partition of the coarse mesh */ 2636 options[METIS_OPTION_CONTIG]=1; 2637 options[METIS_OPTION_NITER]=30; 2638 if (pcbddc->coarsening_ratio > 1) { 2639 if (n_subdomains>n_parts*heuristic_for_metis) { 2640 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2641 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2642 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2643 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2644 } else { 2645 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2646 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2647 } 2648 } else { 2649 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2650 } 2651 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2652 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2653 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2654 2655 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2656 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2657 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2658 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2659 } 2660 2661 /* Create new communicator for coarse problem splitting the old one */ 2662 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2663 coarse_color=0; /* for communicator splitting */ 2664 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2665 } 2666 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2667 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2668 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2669 2670 if ( coarse_color == 0 ) { 2671 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2672 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2673 } else { 2674 rank_coarse_comm = MPI_PROC_NULL; 2675 } 2676 2677 /* master proc take care of arranging and distributing coarse information */ 2678 if (rank_coarse_comm == master_proc) { 2679 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2680 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2681 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2682 /* some initializations */ 2683 displacements_recv[0]=0; 2684 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2685 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2686 for (j=0;j<size_coarse_comm;j++) { 2687 for (i=0;i<size_prec_comm;i++) { 2688 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2689 } 2690 } 2691 /* displacements needed for scatterv of total_ranks_recv */ 2692 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2693 2694 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2695 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2696 for (j=0;j<size_coarse_comm;j++) { 2697 for (i=0;i<size_prec_comm;i++) { 2698 if (coarse_subdivision[i]==j) { 2699 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2700 total_count_recv[j]+=1; 2701 } 2702 } 2703 } 2704 /*for (j=0;j<size_coarse_comm;j++) { 2705 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2706 for (i=0;i<total_count_recv[j];i++) { 2707 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2708 } 2709 printf("\n"); 2710 }*/ 2711 2712 /* identify new decomposition in terms of ranks in the old communicator */ 2713 for (i=0;i<n_subdomains;i++) { 2714 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2715 } 2716 /*printf("coarse_subdivision in old end new ranks\n"); 2717 for (i=0;i<size_prec_comm;i++) 2718 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2719 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2720 } else { 2721 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2722 } 2723 printf("\n");*/ 2724 } 2725 2726 /* Scatter new decomposition for send details */ 2727 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2728 /* Scatter receiving details to members of coarse decomposition */ 2729 if ( coarse_color == 0) { 2730 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2731 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2732 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2733 } 2734 2735 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2736 if (coarse_color == 0) { 2737 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2738 for (i=0;i<count_recv;i++) 2739 printf("%d ",ranks_recv[i]); 2740 printf("\n"); 2741 }*/ 2742 2743 if (rank_prec_comm == master_proc) { 2744 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2745 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2746 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2747 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2748 } 2749 #endif 2750 break; 2751 } 2752 2753 case(REPLICATED_BDDC): 2754 2755 pcbddc->coarse_communications_type = GATHERS_BDDC; 2756 coarse_mat_type = MATSEQAIJ; 2757 coarse_pc_type = PCLU; 2758 coarse_ksp_type = KSPPREONLY; 2759 coarse_comm = PETSC_COMM_SELF; 2760 active_rank = rank_prec_comm; 2761 break; 2762 2763 case(PARALLEL_BDDC): 2764 2765 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2766 coarse_mat_type = MATAIJ; 2767 coarse_pc_type = PCREDUNDANT; 2768 coarse_ksp_type = KSPPREONLY; 2769 coarse_comm = prec_comm; 2770 active_rank = rank_prec_comm; 2771 break; 2772 2773 case(SEQUENTIAL_BDDC): 2774 pcbddc->coarse_communications_type = GATHERS_BDDC; 2775 coarse_mat_type = MATAIJ; 2776 coarse_pc_type = PCLU; 2777 coarse_ksp_type = KSPPREONLY; 2778 coarse_comm = PETSC_COMM_SELF; 2779 active_rank = master_proc; 2780 break; 2781 } 2782 2783 switch(pcbddc->coarse_communications_type){ 2784 2785 case(SCATTERS_BDDC): 2786 { 2787 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2788 2789 IS coarse_IS; 2790 2791 if(pcbddc->coarsening_ratio == 1) { 2792 ins_local_primal_size = pcbddc->local_primal_size; 2793 ins_local_primal_indices = pcbddc->local_primal_indices; 2794 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2795 /* nonzeros */ 2796 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2797 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2798 for (i=0;i<ins_local_primal_size;i++) { 2799 dnz[i] = ins_local_primal_size; 2800 } 2801 } else { 2802 PetscMPIInt send_size; 2803 PetscMPIInt *send_buffer; 2804 PetscInt *aux_ins_indices; 2805 PetscInt ii,jj; 2806 MPI_Request *requests; 2807 2808 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2809 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2810 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2811 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2812 pcbddc->replicated_primal_size = count_recv; 2813 j = 0; 2814 for (i=0;i<count_recv;i++) { 2815 pcbddc->local_primal_displacements[i] = j; 2816 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2817 } 2818 pcbddc->local_primal_displacements[count_recv] = j; 2819 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2820 /* allocate auxiliary space */ 2821 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2822 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2823 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2824 /* allocate stuffs for message massing */ 2825 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2826 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2827 /* send indices to be inserted */ 2828 for (i=0;i<count_recv;i++) { 2829 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2830 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2831 } 2832 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2833 send_size = pcbddc->local_primal_size; 2834 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2835 for (i=0;i<send_size;i++) { 2836 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2837 } 2838 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2839 } 2840 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2841 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2842 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2843 } 2844 j = 0; 2845 for (i=0;i<count_recv;i++) { 2846 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2847 localsizes2[i] = ii*ii; 2848 localdispl2[i] = j; 2849 j += localsizes2[i]; 2850 jj = pcbddc->local_primal_displacements[i]; 2851 /* it counts the coarse subdomains sharing the coarse node */ 2852 for (k=0;k<ii;k++) { 2853 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2854 } 2855 } 2856 /* temp_coarse_mat_vals used to store matrix values to be received */ 2857 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2858 /* evaluate how many values I will insert in coarse mat */ 2859 ins_local_primal_size = 0; 2860 for (i=0;i<coarse_size;i++) { 2861 if (aux_ins_indices[i]) { 2862 ins_local_primal_size++; 2863 } 2864 } 2865 /* evaluate indices I will insert in coarse mat */ 2866 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2867 j = 0; 2868 for(i=0;i<coarse_size;i++) { 2869 if(aux_ins_indices[i]) { 2870 ins_local_primal_indices[j] = i; 2871 j++; 2872 } 2873 } 2874 /* processes partecipating in coarse problem receive matrix data from their friends */ 2875 for (i=0;i<count_recv;i++) { 2876 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2877 } 2878 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2879 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2880 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2881 } 2882 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2883 /* nonzeros */ 2884 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2885 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2886 /* use aux_ins_indices to realize a global to local mapping */ 2887 j=0; 2888 for(i=0;i<coarse_size;i++){ 2889 if(aux_ins_indices[i]==0){ 2890 aux_ins_indices[i]=-1; 2891 } else { 2892 aux_ins_indices[i]=j; 2893 j++; 2894 } 2895 } 2896 for (i=0;i<count_recv;i++) { 2897 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2898 for (k=0;k<j;k++) { 2899 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2900 } 2901 } 2902 /* check */ 2903 for (i=0;i<ins_local_primal_size;i++) { 2904 if (dnz[i] > ins_local_primal_size) { 2905 dnz[i] = ins_local_primal_size; 2906 } 2907 } 2908 ierr = PetscFree(requests);CHKERRQ(ierr); 2909 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2910 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2911 } 2912 /* create local to global mapping needed by coarse MATIS */ 2913 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2914 coarse_comm = prec_comm; 2915 active_rank = rank_prec_comm; 2916 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2917 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2918 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2919 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2920 /* arrays for values insertion */ 2921 ins_local_primal_size = pcbddc->local_primal_size; 2922 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2923 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2924 for (j=0;j<ins_local_primal_size;j++){ 2925 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2926 for (i=0;i<ins_local_primal_size;i++) { 2927 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2928 } 2929 } 2930 } 2931 break; 2932 2933 } 2934 2935 case(GATHERS_BDDC): 2936 { 2937 2938 PetscMPIInt mysize,mysize2; 2939 PetscMPIInt *send_buffer; 2940 2941 if (rank_prec_comm==active_rank) { 2942 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2943 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 2944 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2945 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2946 /* arrays for values insertion */ 2947 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2948 localdispl2[0]=0; 2949 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2950 j=0; 2951 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2952 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2953 } 2954 2955 mysize=pcbddc->local_primal_size; 2956 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2957 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2958 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2959 2960 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2961 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2962 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2963 } else { 2964 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2965 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2966 } 2967 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2968 break; 2969 }/* switch on coarse problem and communications associated with finished */ 2970 } 2971 2972 /* Now create and fill up coarse matrix */ 2973 if ( rank_prec_comm == active_rank ) { 2974 2975 Mat matis_coarse_local_mat; 2976 2977 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2978 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2979 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2980 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2981 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2982 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2983 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2984 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2985 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2986 } else { 2987 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2988 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2989 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2990 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2991 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2992 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2993 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2994 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2995 } 2996 /* preallocation */ 2997 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2998 2999 PetscInt lrows,lcols,bs; 3000 3001 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 3002 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 3003 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 3004 3005 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3006 3007 Vec vec_dnz,vec_onz; 3008 PetscScalar *my_dnz,*my_onz,*array; 3009 PetscInt *mat_ranges,*row_ownership; 3010 PetscInt coarse_index_row,coarse_index_col,owner; 3011 3012 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 3013 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 3014 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 3015 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 3016 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 3017 3018 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 3019 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 3020 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3021 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3022 3023 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 3024 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 3025 for (i=0;i<size_prec_comm;i++) { 3026 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 3027 row_ownership[j]=i; 3028 } 3029 } 3030 3031 for (i=0;i<pcbddc->local_primal_size;i++) { 3032 coarse_index_row = pcbddc->local_primal_indices[i]; 3033 owner = row_ownership[coarse_index_row]; 3034 for (j=i;j<pcbddc->local_primal_size;j++) { 3035 owner = row_ownership[coarse_index_row]; 3036 coarse_index_col = pcbddc->local_primal_indices[j]; 3037 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3038 my_dnz[i] += 1.0; 3039 } else { 3040 my_onz[i] += 1.0; 3041 } 3042 if (i != j) { 3043 owner = row_ownership[coarse_index_col]; 3044 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3045 my_dnz[j] += 1.0; 3046 } else { 3047 my_onz[j] += 1.0; 3048 } 3049 } 3050 } 3051 } 3052 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3053 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3054 if (pcbddc->local_primal_size) { 3055 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3056 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3057 } 3058 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3059 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3060 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3061 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3062 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3063 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3064 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3065 3066 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3067 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3068 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3069 3070 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3071 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3072 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3073 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3074 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3075 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3076 } else { 3077 for (k=0;k<size_prec_comm;k++){ 3078 offset=pcbddc->local_primal_displacements[k]; 3079 offset2=localdispl2[k]; 3080 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3081 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3082 for (j=0;j<ins_local_primal_size;j++){ 3083 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3084 } 3085 for (j=0;j<ins_local_primal_size;j++) { 3086 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3087 } 3088 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3089 } 3090 } 3091 3092 /* check */ 3093 for (i=0;i<lrows;i++) { 3094 if (dnz[i]>lcols) dnz[i]=lcols; 3095 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 3096 } 3097 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 3098 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3099 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3100 } else { 3101 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3102 ierr = PetscFree(dnz);CHKERRQ(ierr); 3103 } 3104 /* insert values */ 3105 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3106 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3107 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3108 if (pcbddc->coarsening_ratio == 1) { 3109 ins_coarse_mat_vals = coarse_submat_vals; 3110 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3111 } else { 3112 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3113 for (k=0;k<pcbddc->replicated_primal_size;k++) { 3114 offset = pcbddc->local_primal_displacements[k]; 3115 offset2 = localdispl2[k]; 3116 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 3117 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3118 for (j=0;j<ins_local_primal_size;j++){ 3119 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3120 } 3121 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3122 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3123 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3124 } 3125 } 3126 ins_local_primal_indices = 0; 3127 ins_coarse_mat_vals = 0; 3128 } else { 3129 for (k=0;k<size_prec_comm;k++){ 3130 offset=pcbddc->local_primal_displacements[k]; 3131 offset2=localdispl2[k]; 3132 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3133 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3134 for (j=0;j<ins_local_primal_size;j++){ 3135 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3136 } 3137 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3138 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3139 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3140 } 3141 ins_local_primal_indices = 0; 3142 ins_coarse_mat_vals = 0; 3143 } 3144 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3145 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3146 /* symmetry of coarse matrix */ 3147 if (issym) { 3148 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3149 } 3150 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3151 } 3152 3153 /* create loc to glob scatters if needed */ 3154 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3155 IS local_IS,global_IS; 3156 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3157 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3158 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3159 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3160 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3161 } 3162 3163 /* free memory no longer needed */ 3164 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3165 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3166 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3167 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3168 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3169 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3170 3171 /* Compute coarse null space */ 3172 CoarseNullSpace = 0; 3173 if (pcbddc->NullSpace) { 3174 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3175 } 3176 3177 /* KSP for coarse problem */ 3178 if (rank_prec_comm == active_rank) { 3179 PetscBool isbddc=PETSC_FALSE; 3180 3181 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3182 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3183 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3184 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3185 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3186 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3187 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3188 /* Allow user's customization */ 3189 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3190 /* Set Up PC for coarse problem BDDC */ 3191 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3192 i = pcbddc->current_level+1; 3193 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3194 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3195 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3196 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3197 if (CoarseNullSpace) { 3198 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3199 } 3200 if (dbg_flag) { 3201 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3203 } 3204 } else { 3205 if (CoarseNullSpace) { 3206 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3207 } 3208 } 3209 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3210 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3211 3212 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3213 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3214 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3215 if (j == 1) { 3216 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3217 if (isbddc) { 3218 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3219 } 3220 } 3221 } 3222 /* Check coarse problem if requested */ 3223 if ( dbg_flag && rank_prec_comm == active_rank ) { 3224 KSP check_ksp; 3225 PC check_pc; 3226 Vec check_vec; 3227 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3228 KSPType check_ksp_type; 3229 3230 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3231 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3232 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3233 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3234 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3235 if (issym) check_ksp_type = KSPCG; 3236 else check_ksp_type = KSPGMRES; 3237 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3238 } else { 3239 check_ksp_type = KSPPREONLY; 3240 } 3241 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3242 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3243 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3244 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3245 /* create random vec */ 3246 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3247 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3248 if (CoarseNullSpace) { 3249 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3250 } 3251 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3252 /* solve coarse problem */ 3253 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3254 if (CoarseNullSpace) { 3255 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3256 } 3257 /* check coarse problem residual error */ 3258 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3259 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3260 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3261 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3262 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3263 /* get eigenvalue estimation if inexact */ 3264 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3265 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3266 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3267 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3268 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3269 } 3270 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3271 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3272 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3273 } 3274 if (dbg_flag) { 3275 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3276 } 3277 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3278 3279 PetscFunctionReturn(0); 3280 } 3281 3282