1 /* TODOLIST 2 allow passing array of IS to identify dof splitting per node (see which_dof array) 3 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 4 - mind the problem with coarsening_factor 5 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 6 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 7 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 8 - Add level slot to bddc data structure and associated Set/Get functions 9 code refactoring: 10 - removing indices to constraints, quadrature constrainsts and similar from PCBDDCManageLocalBoundaries 11 - analyze MatSetNearNullSpace as suggested by Jed 12 - build constraint matrix after SVD on local connected components 13 - pick up better names for static functions 14 check log_summary for leaking (actually: 1 Vector per level plus only 1 IS) 15 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 remove // commments and adhere to PETSc code requirements 21 man pages 22 */ 23 24 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 25 Implementation of BDDC preconditioner based on: 26 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 27 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 28 29 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 /* -------------------------------------------------------------------------- */ 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 62 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 63 { 64 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 65 66 PetscFunctionBegin; 67 pcbddc->coarse_problem_type = CPT; 68 PetscFunctionReturn(0); 69 } 70 EXTERN_C_END 71 72 /* -------------------------------------------------------------------------- */ 73 #undef __FUNCT__ 74 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 75 /*@ 76 PCBDDCSetCoarseProblemType - brief explanation 77 78 Collective on PC 79 80 Input Parameters: 81 + pc - the preconditioning context 82 - CoarseProblemType - pick a better name and explain what this is 83 84 Level: intermediate 85 86 Notes: 87 usage notes, perhaps an example 88 89 .seealso: PCBDDC 90 @*/ 91 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 92 { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 97 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 /* -------------------------------------------------------------------------- */ 102 EXTERN_C_BEGIN 103 #undef __FUNCT__ 104 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 105 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 106 { 107 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 113 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 114 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 /* -------------------------------------------------------------------------- */ 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 122 /*@ 123 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 124 Neumann boundaries for the global problem. 125 126 Logically Collective on PC 127 128 Input Parameters: 129 + pc - the preconditioning context 130 - NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 131 132 Level: intermediate 133 134 Notes: 135 usage notes, perhaps an example 136 137 .seealso: PCBDDC 138 @*/ 139 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 140 { 141 PetscErrorCode ierr; 142 143 PetscFunctionBegin; 144 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 145 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 /* -------------------------------------------------------------------------- */ 149 EXTERN_C_BEGIN 150 #undef __FUNCT__ 151 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 152 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 156 PetscFunctionBegin; 157 if(pcbddc->NeumannBoundaries) { 158 *NeumannBoundaries = pcbddc->NeumannBoundaries; 159 } else { 160 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 161 } 162 PetscFunctionReturn(0); 163 } 164 EXTERN_C_END 165 166 /* -------------------------------------------------------------------------- */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 169 /*@ 170 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 171 Neumann boundaries for the global problem. 172 173 Logically Collective on PC 174 175 Input Parameters: 176 + pc - the preconditioning context 177 178 Output Parameters: 179 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 180 181 Level: intermediate 182 183 Notes: 184 usage notes, perhaps an example 185 186 .seealso: PCBDDC 187 @*/ 188 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 189 { 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 194 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "PCSetUp_BDDC" 200 /* -------------------------------------------------------------------------- */ 201 /* 202 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 203 by setting data structures and options. 204 205 Input Parameter: 206 + pc - the preconditioner context 207 208 Application Interface Routine: PCSetUp() 209 210 Notes: 211 The interface routine PCSetUp() is not usually called directly by 212 the user, but instead is called by PCApply() if necessary. 213 */ 214 PetscErrorCode PCSetUp_BDDC(PC pc) 215 { 216 PetscErrorCode ierr; 217 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 218 PC_IS *pcis = (PC_IS*)(pc->data); 219 220 PetscFunctionBegin; 221 if (!pc->setupcalled) { 222 223 /* For BDDC we need to define a local "Neumann" to that defined in PCISSetup 224 So, we set to pcnone the Neumann problem of pcid in order to avoid unneeded computation 225 Also, we decide to directly build the (same) Dirichlet problem */ 226 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 227 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 228 /* Set up all the "iterative substructuring" common block */ 229 ierr = PCISSetUp(pc);CHKERRQ(ierr); 230 /* Destroy some PC_IS data which is not needed by BDDC */ 231 //if (pcis->ksp_D) {ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);} 232 //if (pcis->ksp_N) {ierr = KSPDestroy(&pcis->ksp_N);CHKERRQ(ierr);} 233 //if (pcis->vec2_B) {ierr = VecDestroy(&pcis->vec2_B);CHKERRQ(ierr);} 234 //if (pcis->vec3_B) {ierr = VecDestroy(&pcis->vec3_B);CHKERRQ(ierr);} 235 //pcis->ksp_D = 0; 236 //pcis->ksp_N = 0; 237 //pcis->vec2_B = 0; 238 //pcis->vec3_B = 0; 239 if(pcbddc->dbg_flag) { 240 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 241 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 242 } 243 244 //TODO MOVE CODE FRAGMENT 245 PetscInt im_active=0; 246 if(pcis->n) im_active = 1; 247 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 248 //printf("Calling PCBDDC MANAGE with active procs %d (im_active = %d)\n",pcbddc->active_procs,im_active); 249 /* Set up grid quantities for BDDC */ 250 //TODO only active procs must call this -> remove synchronized print inside (the only point of sync) 251 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 252 253 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 254 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 255 256 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; //processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo 257 /* We no more need this matrix */ 258 //if (pcis->A_BB) {ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);} 259 //pcis->A_BB = 0; 260 261 //printf("Using coarse problem type %d\n",pcbddc->coarse_problem_type); 262 //printf("Using coarse communications type %d\n",pcbddc->coarse_communications_type); 263 //printf("Using coarsening ratio %d\n",pcbddc->coarsening_ratio); 264 } 265 266 PetscFunctionReturn(0); 267 } 268 269 /* -------------------------------------------------------------------------- */ 270 /* 271 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 272 273 Input Parameters: 274 . pc - the preconditioner context 275 . r - input vector (global) 276 277 Output Parameter: 278 . z - output vector (global) 279 280 Application Interface Routine: PCApply() 281 */ 282 #undef __FUNCT__ 283 #define __FUNCT__ "PCApply_BDDC" 284 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 285 { 286 PC_IS *pcis = (PC_IS*)(pc->data); 287 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 288 PetscErrorCode ierr; 289 PetscScalar one = 1.0; 290 PetscScalar m_one = -1.0; 291 292 /* This code is similar to that provided in nn.c for PCNN 293 NN interface preconditioner changed to BDDC 294 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 295 296 PetscFunctionBegin; 297 /* First Dirichlet solve */ 298 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 299 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 300 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 301 /* 302 Assembling right hand side for BDDC operator 303 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 304 - the interface part of the global vector z 305 */ 306 //ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 307 //ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 308 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 309 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 310 //ierr = MatMultAdd(pcis->A_BI,pcis->vec2_D,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 311 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 312 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 313 ierr = VecCopy(r,z);CHKERRQ(ierr); 314 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 315 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 316 317 /* 318 Apply interface preconditioner 319 Results are stored in: 320 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 321 - the interface part of the global vector z 322 */ 323 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 324 325 /* Second Dirichlet solves and assembling of output */ 326 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 327 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 328 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 329 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 330 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 331 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 332 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 333 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 334 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 335 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 336 337 PetscFunctionReturn(0); 338 339 } 340 341 /* -------------------------------------------------------------------------- */ 342 /* 343 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 344 345 */ 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 348 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 349 { 350 PetscErrorCode ierr; 351 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 352 PC_IS* pcis = (PC_IS*) (pc->data); 353 PetscScalar zero = 0.0; 354 355 PetscFunctionBegin; 356 /* Get Local boundary and apply partition of unity */ 357 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 358 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 359 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 360 361 /* Application of PHI^T */ 362 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 363 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 364 365 /* Scatter data of coarse_rhs */ 366 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 367 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 368 369 /* Local solution on R nodes */ 370 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 371 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 372 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 373 if(pcbddc->prec_type) { 374 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 375 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 376 } 377 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 378 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 379 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 380 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 381 if(pcbddc->prec_type) { 382 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 383 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 384 } 385 386 /* Coarse solution */ 387 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 388 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 389 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 390 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 391 392 /* Sum contributions from two levels */ 393 /* Apply partition of unity and sum boundary values */ 394 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 395 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 396 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 397 ierr = VecSet(z,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 399 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 400 401 PetscFunctionReturn(0); 402 } 403 404 405 /* -------------------------------------------------------------------------- */ 406 /* 407 PCBDDCSolveSaddlePoint 408 409 */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 412 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 413 { 414 PetscErrorCode ierr; 415 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 416 417 PetscFunctionBegin; 418 419 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 420 if(pcbddc->n_constraints) { 421 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 422 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 423 } 424 425 PetscFunctionReturn(0); 426 } 427 /* -------------------------------------------------------------------------- */ 428 /* 429 PCBDDCScatterCoarseDataBegin 430 431 */ 432 #undef __FUNCT__ 433 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 434 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 435 { 436 PetscErrorCode ierr; 437 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 438 439 PetscFunctionBegin; 440 441 switch(pcbddc->coarse_communications_type){ 442 case SCATTERS_BDDC: 443 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 444 break; 445 case GATHERS_BDDC: 446 break; 447 } 448 PetscFunctionReturn(0); 449 450 } 451 /* -------------------------------------------------------------------------- */ 452 /* 453 PCBDDCScatterCoarseDataEnd 454 455 */ 456 #undef __FUNCT__ 457 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 458 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 459 { 460 PetscErrorCode ierr; 461 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 462 PetscScalar* array_to; 463 PetscScalar* array_from; 464 MPI_Comm comm=((PetscObject)pc)->comm; 465 PetscInt i; 466 467 PetscFunctionBegin; 468 469 switch(pcbddc->coarse_communications_type){ 470 case SCATTERS_BDDC: 471 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 472 break; 473 case GATHERS_BDDC: 474 if(vec_from) VecGetArray(vec_from,&array_from); 475 if(vec_to) VecGetArray(vec_to,&array_to); 476 switch(pcbddc->coarse_problem_type){ 477 case SEQUENTIAL_BDDC: 478 if(smode == SCATTER_FORWARD) { 479 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 480 if(vec_to) { 481 for(i=0;i<pcbddc->replicated_primal_size;i++) 482 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 483 } 484 } else { 485 if(vec_from) 486 for(i=0;i<pcbddc->replicated_primal_size;i++) 487 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 488 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 489 } 490 break; 491 case REPLICATED_BDDC: 492 if(smode == SCATTER_FORWARD) { 493 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 494 for(i=0;i<pcbddc->replicated_primal_size;i++) 495 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 496 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 497 for(i=0;i<pcbddc->local_primal_size;i++) 498 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 499 } 500 break; 501 case MULTILEVEL_BDDC: 502 break; 503 case PARALLEL_BDDC: 504 break; 505 } 506 if(vec_from) VecRestoreArray(vec_from,&array_from); 507 if(vec_to) VecRestoreArray(vec_to,&array_to); 508 break; 509 } 510 PetscFunctionReturn(0); 511 512 } 513 514 /* -------------------------------------------------------------------------- */ 515 /* 516 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 517 that was created with PCCreate_BDDC(). 518 519 Input Parameter: 520 . pc - the preconditioner context 521 522 Application Interface Routine: PCDestroy() 523 */ 524 #undef __FUNCT__ 525 #define __FUNCT__ "PCDestroy_BDDC" 526 PetscErrorCode PCDestroy_BDDC(PC pc) 527 { 528 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 /* free data created by PCIS */ 533 ierr = PCISDestroy(pc);CHKERRQ(ierr); 534 /* free BDDC data */ 535 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 536 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 537 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 538 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 539 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 540 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 541 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 543 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 544 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 545 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 546 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 547 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 548 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 550 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 551 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 552 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 553 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 554 ierr = PetscFree(pcbddc->vertices);CHKERRQ(ierr); 555 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 556 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 557 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 558 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 559 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 560 if (pcbddc->n_constraints) { 561 ierr = PetscFree(pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 562 ierr = PetscFree(pcbddc->indices_to_constraint);CHKERRQ(ierr); 563 ierr = PetscFree(pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 564 ierr = PetscFree(pcbddc->quadrature_constraint);CHKERRQ(ierr); 565 ierr = PetscFree(pcbddc->sizes_of_constraint);CHKERRQ(ierr); 566 } 567 /* Free the private data structure that was hanging off the PC */ 568 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 /* -------------------------------------------------------------------------- */ 573 /*MC 574 PCBDDC - Balancing Domain Decomposition by Constraints. 575 576 Options Database Keys: 577 . -pcbddc ??? - 578 579 Level: intermediate 580 581 Notes: The matrix used with this preconditioner must be of type MATIS 582 583 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 584 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 585 on the subdomains). 586 587 Options for the coarse grid preconditioner can be set with - 588 Options for the Dirichlet subproblem can be set with - 589 Options for the Neumann subproblem can be set with - 590 591 Contributed by Stefano Zampini 592 593 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 594 M*/ 595 EXTERN_C_BEGIN 596 #undef __FUNCT__ 597 #define __FUNCT__ "PCCreate_BDDC" 598 PetscErrorCode PCCreate_BDDC(PC pc) 599 { 600 PetscErrorCode ierr; 601 PC_BDDC *pcbddc; 602 603 PetscFunctionBegin; 604 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 605 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 606 pc->data = (void*)pcbddc; 607 /* create PCIS data structure */ 608 ierr = PCISCreate(pc);CHKERRQ(ierr); 609 /* BDDC specific */ 610 pcbddc->coarse_vec = 0; 611 pcbddc->coarse_rhs = 0; 612 pcbddc->coarse_ksp = 0; 613 pcbddc->coarse_phi_B = 0; 614 pcbddc->coarse_phi_D = 0; 615 pcbddc->vec1_P = 0; 616 pcbddc->vec1_R = 0; 617 pcbddc->vec2_R = 0; 618 pcbddc->local_auxmat1 = 0; 619 pcbddc->local_auxmat2 = 0; 620 pcbddc->R_to_B = 0; 621 pcbddc->R_to_D = 0; 622 pcbddc->ksp_D = 0; 623 pcbddc->ksp_R = 0; 624 pcbddc->n_constraints = 0; 625 pcbddc->n_vertices = 0; 626 pcbddc->vertices = 0; 627 pcbddc->indices_to_constraint = 0; 628 pcbddc->quadrature_constraint = 0; 629 pcbddc->sizes_of_constraint = 0; 630 pcbddc->local_primal_indices = 0; 631 pcbddc->prec_type = PETSC_FALSE; 632 pcbddc->NeumannBoundaries = 0; 633 pcbddc->local_primal_sizes = 0; 634 pcbddc->local_primal_displacements = 0; 635 pcbddc->replicated_local_primal_indices = 0; 636 pcbddc->replicated_local_primal_values = 0; 637 pcbddc->coarse_loc_to_glob = 0; 638 pcbddc->dbg_flag = PETSC_FALSE; 639 pcbddc->vertices_flag = PETSC_FALSE; 640 pcbddc->constraints_flag = PETSC_FALSE; 641 pcbddc->faces_flag = PETSC_FALSE; 642 pcbddc->edges_flag = PETSC_FALSE; 643 pcbddc->coarsening_ratio = 8; 644 /* function pointers */ 645 pc->ops->apply = PCApply_BDDC; 646 pc->ops->applytranspose = 0; 647 pc->ops->setup = PCSetUp_BDDC; 648 pc->ops->destroy = PCDestroy_BDDC; 649 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 650 pc->ops->view = 0; 651 pc->ops->applyrichardson = 0; 652 pc->ops->applysymmetricleft = 0; 653 pc->ops->applysymmetricright = 0; 654 /* composing function */ 655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 656 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 658 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 660 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 EXTERN_C_END 664 665 /* -------------------------------------------------------------------------- */ 666 /* 667 PCBDDCCoarseSetUp - 668 */ 669 #undef __FUNCT__ 670 #define __FUNCT__ "PCBDDCCoarseSetUp" 671 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 672 { 673 PetscErrorCode ierr; 674 675 PC_IS* pcis = (PC_IS*)(pc->data); 676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 677 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 678 IS is_R_local; 679 IS is_V_local; 680 IS is_C_local; 681 IS is_aux1; 682 IS is_aux2; 683 const VecType impVecType; 684 const MatType impMatType; 685 PetscInt n_R=0; 686 PetscInt n_D=0; 687 PetscInt n_B=0; 688 PetscMPIInt totprocs; 689 PetscScalar zero=0.0; 690 PetscScalar one=1.0; 691 PetscScalar m_one=-1.0; 692 PetscScalar* array; 693 PetscScalar *coarse_submat_vals; 694 PetscInt *idx_R_local; 695 PetscInt *idx_V_B; 696 PetscScalar *coarsefunctions_errors; 697 PetscScalar *constraints_errors; 698 /* auxiliary indices */ 699 PetscInt s,i,j,k; 700 /* for verbose output of bddc */ 701 PetscViewer viewer=pcbddc->dbg_viewer; 702 PetscBool dbg_flag=pcbddc->dbg_flag; 703 /* for counting coarse dofs */ 704 PetscScalar coarsesum; 705 706 PetscFunctionBegin; 707 /* Set Non-overlapping dimensions */ 708 n_B = pcis->n_B; n_D = pcis->n - n_B; 709 /* Set local primal size */ 710 pcbddc->local_primal_size = pcbddc->n_vertices + pcbddc->n_constraints; 711 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 712 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 713 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 714 for(i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = one; } 715 for(i=0;i<pcbddc->n_constraints;i++) { 716 for (j=0; j<pcbddc->sizes_of_constraint[i]; j++) { 717 k = pcbddc->indices_to_constraint[i][j]; 718 if( array[k] == zero ) { 719 array[k] = one; 720 break; 721 } 722 } 723 } 724 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 725 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 726 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 727 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 728 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 730 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 731 for(i=0;i<pcis->n;i++) { if(array[i] > zero) array[i] = one/array[i]; } 732 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 733 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 734 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 735 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 736 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 737 pcbddc->coarse_size = (PetscInt) coarsesum; 738 739 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 740 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 741 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 742 for (i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = zero; } 743 ierr = PetscMalloc(( pcis->n - pcbddc->n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 744 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 745 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 746 if(dbg_flag) { 747 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 751 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,pcbddc->n_vertices,pcbddc->n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 752 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 753 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 754 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 755 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 756 } 757 /* Allocate needed vectors */ 758 /* Set Mat type for local matrices needed by BDDC precondtioner */ 759 impMatType = MATSEQDENSE; 760 impVecType = VECSEQ; 761 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 762 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 763 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 764 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 765 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 766 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 767 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 768 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 769 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 770 771 /* Creating some index sets needed */ 772 /* For submatrices */ 773 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 774 if(pcbddc->n_vertices) { ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->vertices,PETSC_COPY_VALUES,&is_V_local);CHKERRQ(ierr); } 775 if(pcbddc->n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,pcbddc->n_vertices,1,&is_C_local);CHKERRQ(ierr); } 776 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 777 { 778 PetscInt *aux_array1; 779 PetscInt *aux_array2; 780 PetscScalar value; 781 782 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 783 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 784 785 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 786 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 788 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 793 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 794 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 795 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 796 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 797 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 798 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 799 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 800 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 801 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 802 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 803 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 804 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 805 806 if(pcbddc->prec_type || dbg_flag ) { 807 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 808 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 809 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 810 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 811 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 812 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 813 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 814 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 815 } 816 817 /* Check scatters */ 818 if(dbg_flag) { 819 820 Vec vec_aux; 821 822 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 823 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 824 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 825 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 826 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 827 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 828 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 829 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 832 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 833 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 834 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 835 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 836 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 837 838 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 839 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 840 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 841 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 843 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 844 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 847 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 848 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 849 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 850 851 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 852 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 853 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 854 855 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 856 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 858 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 859 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 864 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 865 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 866 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 867 868 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 869 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 870 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 871 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 872 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 879 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 880 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 881 882 } 883 } 884 885 /* vertices in boundary numbering */ 886 if(pcbddc->n_vertices) { 887 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 888 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 889 for (i=0; i<pcbddc->n_vertices; i++) { array[ pcbddc->vertices[i] ] = i; } 890 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 891 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 894 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 895 //printf("vertices in boundary numbering\n"); 896 for (i=0; i<pcbddc->n_vertices; i++) { 897 s=0; 898 while (array[s] != i ) {s++;} 899 idx_V_B[i]=s; 900 //printf("idx_V_B[%d]=%d\n",i,s); 901 } 902 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 903 } 904 905 906 /* Creating PC contexts for local Dirichlet and Neumann problems */ 907 { 908 Mat A_RR; 909 PC pc_temp; 910 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 911 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 912 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 913 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 914 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 915 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 916 /* default */ 917 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 918 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 919 /* Allow user's customization */ 920 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 921 /* Set Up KSP for Dirichlet problem of BDDC */ 922 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 923 /* Matrix for Neumann problem is A_RR -> we need to create it */ 924 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 925 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 926 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 927 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 928 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 929 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 930 /* default */ 931 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 932 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 933 /* Allow user's customization */ 934 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 935 /* Set Up KSP for Neumann problem of BDDC */ 936 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 937 /* check Dirichlet and Neumann solvers */ 938 if(pcbddc->dbg_flag) { 939 Vec temp_vec; 940 PetscScalar value; 941 942 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 943 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 944 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 945 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 946 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 947 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 948 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 949 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 950 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 951 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 953 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 954 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 955 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 956 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 957 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 958 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 959 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 961 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 962 } 963 /* free Neumann problem's matrix */ 964 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 965 } 966 967 /* Assemble all remaining stuff needed to apply BDDC */ 968 { 969 Mat A_RV,A_VR,A_VV; 970 Mat M1,M2; 971 Mat C_CR; 972 Mat CMAT,AUXMAT; 973 Vec vec1_C; 974 Vec vec2_C; 975 Vec vec1_V; 976 Vec vec2_V; 977 PetscInt *nnz; 978 PetscInt *auxindices; 979 PetscInt index; 980 PetscScalar* array2; 981 MatFactorInfo matinfo; 982 983 /* Allocating some extra storage just to be safe */ 984 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 985 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 986 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 987 988 /* some work vectors on vertices and/or constraints */ 989 if(pcbddc->n_vertices) { 990 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 991 ierr = VecSetSizes(vec1_V,pcbddc->n_vertices,pcbddc->n_vertices);CHKERRQ(ierr); 992 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 993 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 994 } 995 if(pcbddc->n_constraints) { 996 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 997 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 999 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1000 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1001 } 1002 /* Create C matrix [I 0; 0 const] */ 1003 ierr = MatCreate(PETSC_COMM_SELF,&CMAT);CHKERRQ(ierr); 1004 ierr = MatSetType(CMAT,MATSEQAIJ);CHKERRQ(ierr); 1005 ierr = MatSetSizes(CMAT,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1006 /* nonzeros */ 1007 for(i=0;i<pcbddc->n_vertices;i++) { nnz[i]= 1; } 1008 for(i=0;i<pcbddc->n_constraints;i++) { nnz[i+pcbddc->n_vertices]=pcbddc->sizes_of_constraint[i];} 1009 ierr = MatSeqAIJSetPreallocation(CMAT,0,nnz);CHKERRQ(ierr); 1010 for(i=0;i<pcbddc->n_vertices;i++) { 1011 ierr = MatSetValue(CMAT,i,pcbddc->vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1012 } 1013 for(i=0;i<pcbddc->n_constraints;i++) { 1014 index=i+pcbddc->n_vertices; 1015 ierr = MatSetValues(CMAT,1,&index,pcbddc->sizes_of_constraint[i],pcbddc->indices_to_constraint[i],pcbddc->quadrature_constraint[i],INSERT_VALUES);CHKERRQ(ierr); 1016 } 1017 //if(dbg_flag) printf("CMAT assembling\n"); 1018 ierr = MatAssemblyBegin(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1019 ierr = MatAssemblyEnd(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1020 //ierr = MatView(CMAT,PETSC_VIEWER_STDOUT_SELF); 1021 1022 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1023 1024 if(pcbddc->n_constraints) { 1025 /* some work vectors */ 1026 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1027 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,pcbddc->n_constraints,n_R,pcbddc->n_constraints);CHKERRQ(ierr); 1028 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1029 1030 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1031 for(i=0;i<pcbddc->n_constraints;i++) { 1032 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1033 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1034 for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { array[ pcbddc->indices_to_constraint[i][j] ] = - pcbddc->quadrature_constraint[i][j]; } 1035 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1036 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1037 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1038 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1039 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1040 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1041 index=i; 1042 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1043 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1044 } 1045 //if(dbg_flag) printf("pcbddc->local_auxmat2 assembling\n"); 1046 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd (pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1048 1049 /* Create Constraint matrix on R nodes: C_{CR} */ 1050 ierr = MatGetSubMatrix(CMAT,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1051 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1052 1053 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1054 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1056 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1057 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1058 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1059 1060 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc (should be dense) */ 1061 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1062 ierr = MatSetSizes(M1,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1063 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1064 for(i=0;i<pcbddc->n_constraints;i++) { 1065 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1066 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1067 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1068 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1069 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1070 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1071 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1072 index=i; 1073 ierr = MatSetValues(M1,pcbddc->n_constraints,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1074 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1075 } 1076 //if(dbg_flag) printf("M1 assembling\n"); 1077 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1078 ierr = MatAssemblyEnd (M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1080 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1081 //if(dbg_flag) printf("pcbddc->local_auxmat1 computing and assembling\n"); 1082 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1083 1084 } 1085 1086 /* Get submatrices from subdomain matrix */ 1087 if(pcbddc->n_vertices){ 1088 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1089 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1090 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1091 /* Assemble M2 = A_RR^{-1}A_RV */ 1092 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1093 ierr = MatSetSizes(M2,n_R,pcbddc->n_vertices,n_R,pcbddc->n_vertices);CHKERRQ(ierr); 1094 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1095 for(i=0;i<pcbddc->n_vertices;i++) { 1096 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1097 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1098 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1099 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1100 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1101 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1102 index=i; 1103 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1104 ierr = MatSetValues(M2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1105 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1106 } 1107 //if(dbg_flag) printf("M2 assembling\n"); 1108 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1109 ierr = MatAssemblyEnd (M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1110 } 1111 1112 /* Matrix of coarse basis functions (local) */ 1113 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1114 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1115 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1116 if(pcbddc->prec_type || dbg_flag ) { 1117 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1118 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1119 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1120 } 1121 1122 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1123 if(dbg_flag) { 1124 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1125 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1126 } 1127 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1128 1129 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1130 for(i=0;i<pcbddc->n_vertices;i++){ 1131 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1132 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1133 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1134 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1135 /* solution of saddle point problem */ 1136 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1137 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1138 if(pcbddc->n_constraints) { 1139 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1140 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1141 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1142 } 1143 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1144 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1145 1146 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1147 /* coarse basis functions */ 1148 index=i; 1149 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1150 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1153 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1154 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1155 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1156 if( pcbddc->prec_type || dbg_flag ) { 1157 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1160 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1162 } 1163 /* subdomain contribution to coarse matrix */ 1164 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1165 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1166 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1167 if( pcbddc->n_constraints) { 1168 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1169 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1170 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1171 } 1172 1173 if( dbg_flag ) { 1174 /* assemble subdomain vector on nodes */ 1175 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1176 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1177 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1178 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1179 array[ pcbddc->vertices[i] ] = one; 1180 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1181 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1182 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1183 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1184 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1185 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1186 for(j=0;j<pcbddc->n_vertices;j++) { array2[j]=array[j]; } 1187 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1188 if(pcbddc->n_constraints) { 1189 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1190 for(j=0;j<pcbddc->n_constraints;j++) { array2[j+pcbddc->n_vertices]=array[j]; } 1191 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1192 } 1193 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1194 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1195 /* check saddle point solution */ 1196 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1197 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1198 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1199 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1200 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1201 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1202 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1203 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1204 } 1205 } 1206 1207 for(i=0;i<pcbddc->n_constraints;i++){ 1208 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1209 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1210 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1211 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1212 /* solution of saddle point problem */ 1213 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1214 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1215 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1216 if(pcbddc->n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1217 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1218 /* coarse basis functions */ 1219 index=i+pcbddc->n_vertices; 1220 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1221 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1224 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1225 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1226 if( pcbddc->prec_type || dbg_flag ) { 1227 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1230 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1231 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1232 } 1233 /* subdomain contribution to coarse matrix */ 1234 if(pcbddc->n_vertices) { 1235 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1236 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1237 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1238 } 1239 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1240 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1241 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1242 1243 if( dbg_flag ) { 1244 /* assemble subdomain vector on nodes */ 1245 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1246 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1247 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1248 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1249 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1250 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1251 /* assemble subdomain vector of lagrange multipliers */ 1252 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1253 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1254 if( pcbddc->n_vertices) { 1255 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1256 for(j=0;j<pcbddc->n_vertices;j++) {array2[j]=-array[j];} 1257 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1258 } 1259 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1260 for(j=0;j<pcbddc->n_constraints;j++) {array2[j+pcbddc->n_vertices]=-array[j];} 1261 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1262 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1263 /* check saddle point solution */ 1264 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1265 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1266 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1267 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1268 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1269 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1270 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1271 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1272 } 1273 } 1274 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1275 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1276 if( pcbddc->prec_type || dbg_flag ) { 1277 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1278 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1279 } 1280 /* Checking coarse_sub_mat and coarse basis functios */ 1281 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1282 //if(dbg_flag) { 1283 if(PETSC_FALSE) { /* waiting for patch on conversion from SEQDENSE to SEQAIJ with new nonzero location errors */ 1284 1285 Mat coarse_sub_mat; 1286 Mat TM1,TM2,TM3,TM4; 1287 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1288 const MatType checkmattype=MATSEQAIJ; 1289 PetscScalar value; 1290 1291 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1292 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1293 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1294 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1295 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1296 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1298 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1299 1300 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1301 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1304 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1305 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1306 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1307 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1308 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1309 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1310 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1311 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1312 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1313 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1314 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1315 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1317 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1320 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1322 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1323 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1324 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1325 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1326 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1327 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1328 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1329 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1330 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1331 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1332 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1333 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1334 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1335 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1336 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1337 } 1338 1339 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1340 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1341 /* free memory */ 1342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1343 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1344 ierr = PetscFree(nnz);CHKERRQ(ierr); 1345 if(pcbddc->n_vertices) { 1346 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1347 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1348 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1349 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1350 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1351 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1352 } 1353 if(pcbddc->n_constraints) { 1354 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1355 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1356 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1357 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1358 } 1359 ierr = MatDestroy(&CMAT);CHKERRQ(ierr); 1360 } 1361 /* free memory */ 1362 if(pcbddc->n_vertices) { 1363 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1364 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1365 } 1366 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1367 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1368 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /* -------------------------------------------------------------------------- */ 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1376 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1377 { 1378 1379 1380 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1381 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1382 PC_IS *pcis = (PC_IS*)pc->data; 1383 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1384 MPI_Comm coarse_comm; 1385 1386 /* common to all choiches */ 1387 PetscScalar *temp_coarse_mat_vals; 1388 PetscScalar *ins_coarse_mat_vals; 1389 PetscInt *ins_local_primal_indices; 1390 PetscMPIInt *localsizes2,*localdispl2; 1391 PetscMPIInt size_prec_comm; 1392 PetscMPIInt rank_prec_comm; 1393 PetscMPIInt active_rank=MPI_PROC_NULL; 1394 PetscMPIInt master_proc=0; 1395 PetscInt ins_local_primal_size; 1396 /* specific to MULTILEVEL_BDDC */ 1397 PetscMPIInt *ranks_recv; 1398 PetscMPIInt count_recv=0; 1399 PetscMPIInt rank_coarse_proc_send_to; 1400 PetscMPIInt coarse_color = MPI_UNDEFINED; 1401 ISLocalToGlobalMapping coarse_ISLG; 1402 /* some other variables */ 1403 PetscErrorCode ierr; 1404 const MatType coarse_mat_type; 1405 const PCType coarse_pc_type; 1406 const KSPType coarse_ksp_type; 1407 PC pc_temp; 1408 PetscInt i,j,k,bs; 1409 /* verbose output viewer */ 1410 PetscViewer viewer=pcbddc->dbg_viewer; 1411 PetscBool dbg_flag=pcbddc->dbg_flag; 1412 1413 PetscFunctionBegin; 1414 1415 ins_local_primal_indices = 0; 1416 ins_coarse_mat_vals = 0; 1417 localsizes2 = 0; 1418 localdispl2 = 0; 1419 temp_coarse_mat_vals = 0; 1420 coarse_ISLG = 0; 1421 1422 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1423 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1424 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1425 1426 /* Assign global numbering to coarse dofs */ 1427 { 1428 PetscScalar one=1.,zero=0.; 1429 PetscScalar *array; 1430 PetscMPIInt *auxlocal_primal; 1431 PetscMPIInt *auxglobal_primal; 1432 PetscMPIInt *all_auxglobal_primal; 1433 PetscMPIInt *all_auxglobal_primal_dummy; 1434 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1435 1436 /* Construct needed data structures for message passing */ 1437 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1438 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1439 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1440 /* Gather local_primal_size information for all processes */ 1441 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1442 pcbddc->replicated_primal_size = 0; 1443 for (i=0; i<size_prec_comm; i++) { 1444 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1445 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1446 } 1447 if(rank_prec_comm == 0) { 1448 /* allocate some auxiliary space */ 1449 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1450 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1451 } 1452 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1453 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1454 1455 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1456 This code fragment assumes that the number of local constraints per connected component 1457 is not greater than the number of nodes defined for the connected component 1458 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1459 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) */ 1460 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1461 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1462 for(i=0;i<pcbddc->n_vertices;i++) { 1463 array[ pcbddc->vertices[i] ] = one; 1464 auxlocal_primal[i] = pcbddc->vertices[i]; 1465 } 1466 for(i=0;i<pcbddc->n_constraints;i++) { 1467 for (j=0; j<pcbddc->sizes_of_constraint[i]; j++) { 1468 k = pcbddc->indices_to_constraint[i][j]; 1469 if( array[k] == zero ) { 1470 array[k] = one; 1471 auxlocal_primal[i+pcbddc->n_vertices] = k; 1472 break; 1473 } 1474 } 1475 } 1476 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1477 1478 /*ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1479 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1480 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1481 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1484 for(i=0;i<pcis->n;i++) { if(array[i] > 0.0) array[i] = one/array[i]; } 1485 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1486 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1487 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1488 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1489 1490 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1491 pcbddc->coarse_size = (PetscInt) coarsesum; 1492 if(dbg_flag) { 1493 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1494 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1495 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1496 }*/ 1497 1498 /* Now assign them a global numbering */ 1499 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1500 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1501 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1502 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1503 1504 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1505 /* It implements a function similar to PetscSortRemoveDupsInt */ 1506 if(rank_prec_comm==0) { 1507 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1508 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1509 k=1; 1510 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1511 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1512 if(j != all_auxglobal_primal[i] ) { 1513 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1514 k++; 1515 j=all_auxglobal_primal[i]; 1516 } 1517 } 1518 } else { 1519 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1520 } 1521 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1522 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1523 1524 /* Now get global coarse numbering of local primal nodes */ 1525 for(i=0;i<pcbddc->local_primal_size;i++) { 1526 k=0; 1527 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1528 pcbddc->local_primal_indices[i]=k; 1529 } 1530 if(dbg_flag) { 1531 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1532 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1533 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1534 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1535 for(i=0;i<pcbddc->local_primal_size;i++) { 1536 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1537 } 1538 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1539 } 1540 /* free allocated memory */ 1541 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1542 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1543 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1544 if(rank_prec_comm == 0) { 1545 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1546 } 1547 } 1548 1549 /* adapt coarse problem type */ 1550 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1551 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1552 1553 switch(pcbddc->coarse_problem_type){ 1554 1555 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1556 { 1557 /* we need additional variables */ 1558 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1559 MetisInt *metis_coarse_subdivision; 1560 MetisInt options[METIS_NOPTIONS]; 1561 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1562 PetscMPIInt procs_jumps_coarse_comm; 1563 PetscMPIInt *coarse_subdivision; 1564 PetscMPIInt *total_count_recv; 1565 PetscMPIInt *total_ranks_recv; 1566 PetscMPIInt *displacements_recv; 1567 PetscMPIInt *my_faces_connectivity; 1568 PetscMPIInt *petsc_faces_adjncy; 1569 MetisInt *faces_adjncy; 1570 MetisInt *faces_xadj; 1571 PetscMPIInt *number_of_faces; 1572 PetscMPIInt *faces_displacements; 1573 PetscInt *array_int; 1574 PetscMPIInt my_faces=0; 1575 PetscMPIInt total_faces=0; 1576 PetscInt ranks_stretching_ratio; 1577 1578 /* define some quantities */ 1579 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1580 coarse_mat_type = MATIS; 1581 coarse_pc_type = PCBDDC; 1582 coarse_ksp_type = KSPRICHARDSON; 1583 1584 /* details of coarse decomposition */ 1585 n_subdomains = pcbddc->active_procs; 1586 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1587 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1588 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1589 1590 printf("Coarse algorithm details: \n"); 1591 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1592 1593 /* build CSR graph of subdomains' connectivity through faces */ 1594 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1595 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1596 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1597 for(j=0;j<pcis->n_shared[i];j++){ 1598 array_int[ pcis->shared[i][j] ]+=1; 1599 } 1600 } 1601 for(i=1;i<pcis->n_neigh;i++){ 1602 for(j=0;j<pcis->n_shared[i];j++){ 1603 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1604 my_faces++; 1605 break; 1606 } 1607 } 1608 } 1609 //printf("I found %d faces.\n",my_faces); 1610 1611 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1612 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1613 my_faces=0; 1614 for(i=1;i<pcis->n_neigh;i++){ 1615 for(j=0;j<pcis->n_shared[i];j++){ 1616 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1617 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1618 my_faces++; 1619 break; 1620 } 1621 } 1622 } 1623 if(rank_prec_comm == master_proc) { 1624 //printf("I found %d total faces.\n",total_faces); 1625 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1626 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1627 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1628 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1629 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1630 } 1631 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1632 if(rank_prec_comm == master_proc) { 1633 faces_xadj[0]=0; 1634 faces_displacements[0]=0; 1635 j=0; 1636 for(i=1;i<size_prec_comm+1;i++) { 1637 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1638 if(number_of_faces[i-1]) { 1639 j++; 1640 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1641 } 1642 } 1643 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1644 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1645 } 1646 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1647 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1648 ierr = PetscFree(array_int);CHKERRQ(ierr); 1649 if(rank_prec_comm == master_proc) { 1650 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 1651 printf("This is the face connectivity (actual ranks)\n"); 1652 for(i=0;i<n_subdomains;i++){ 1653 printf("proc %d is connected with \n",i); 1654 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 1655 printf("%d ",faces_adjncy[j]); 1656 printf("\n"); 1657 } 1658 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 1659 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 1660 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 1661 } 1662 1663 if( rank_prec_comm == master_proc ) { 1664 1665 PetscInt heuristic_for_metis=3; 1666 1667 ncon=1; 1668 faces_nvtxs=n_subdomains; 1669 /* partition graoh induced by face connectivity */ 1670 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 1671 ierr = METIS_SetDefaultOptions(options); 1672 /* we need a contiguous partition of the coarse mesh */ 1673 options[METIS_OPTION_CONTIG]=1; 1674 options[METIS_OPTION_DBGLVL]=1; 1675 options[METIS_OPTION_NITER]=30; 1676 //options[METIS_OPTION_NCUTS]=1; 1677 printf("METIS PART GRAPH\n"); 1678 if(n_subdomains>n_parts*heuristic_for_metis) { 1679 printf("Using Kway\n"); 1680 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 1681 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 1682 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1683 } else { 1684 printf("Using Recursive\n"); 1685 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1686 } 1687 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 1688 printf("Partition done!\n"); 1689 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 1690 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 1691 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 1692 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 1693 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 1694 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 1695 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 1696 } 1697 1698 /* Create new communicator for coarse problem splitting the old one */ 1699 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 1700 coarse_color=0; //for communicator splitting 1701 active_rank=rank_prec_comm; //for insertion of matrix values 1702 } 1703 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 1704 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 1705 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 1706 1707 if( coarse_color == 0 ) { 1708 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 1709 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 1710 printf("Details of coarse comm\n"); 1711 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 1712 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 1713 } else { 1714 rank_coarse_comm = MPI_PROC_NULL; 1715 } 1716 1717 /* master proc take care of arranging and distributing coarse informations */ 1718 if(rank_coarse_comm == master_proc) { 1719 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 1720 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 1721 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 1722 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 1723 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 1724 /* some initializations */ 1725 displacements_recv[0]=0; 1726 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 1727 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 1728 for(j=0;j<size_coarse_comm;j++) 1729 for(i=0;i<size_prec_comm;i++) 1730 if(coarse_subdivision[i]==j) 1731 total_count_recv[j]++; 1732 /* displacements needed for scatterv of total_ranks_recv */ 1733 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 1734 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 1735 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 1736 for(j=0;j<size_coarse_comm;j++) { 1737 for(i=0;i<size_prec_comm;i++) { 1738 if(coarse_subdivision[i]==j) { 1739 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 1740 total_count_recv[j]+=1; 1741 } 1742 } 1743 } 1744 for(j=0;j<size_coarse_comm;j++) { 1745 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 1746 for(i=0;i<total_count_recv[j];i++) { 1747 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 1748 } 1749 printf("\n"); 1750 } 1751 1752 /* identify new decomposition in terms of ranks in the old communicator */ 1753 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 1754 printf("coarse_subdivision in old end new ranks\n"); 1755 for(i=0;i<size_prec_comm;i++) 1756 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 1757 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 1758 } else { 1759 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 1760 } 1761 printf("\n"); 1762 } 1763 1764 /* Scatter new decomposition for send details */ 1765 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1766 /* Scatter receiving details to members of coarse decomposition */ 1767 if( coarse_color == 0) { 1768 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1769 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 1770 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1771 } 1772 1773 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 1774 //if(coarse_color == 0) { 1775 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 1776 // for(i=0;i<count_recv;i++) 1777 // printf("%d ",ranks_recv[i]); 1778 // printf("\n"); 1779 //} 1780 1781 if(rank_prec_comm == master_proc) { 1782 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 1783 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 1784 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 1785 free(coarse_subdivision); 1786 free(total_count_recv); 1787 free(total_ranks_recv); 1788 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 1789 } 1790 break; 1791 } 1792 1793 case(REPLICATED_BDDC): 1794 1795 pcbddc->coarse_communications_type = GATHERS_BDDC; 1796 coarse_mat_type = MATSEQAIJ; 1797 coarse_pc_type = PCLU; 1798 coarse_ksp_type = KSPPREONLY; 1799 coarse_comm = PETSC_COMM_SELF; 1800 active_rank = rank_prec_comm; 1801 break; 1802 1803 case(PARALLEL_BDDC): 1804 1805 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1806 coarse_mat_type = MATMPIAIJ; 1807 coarse_pc_type = PCREDUNDANT; 1808 coarse_ksp_type = KSPPREONLY; 1809 coarse_comm = prec_comm; 1810 active_rank = rank_prec_comm; 1811 break; 1812 1813 case(SEQUENTIAL_BDDC): 1814 pcbddc->coarse_communications_type = GATHERS_BDDC; 1815 coarse_mat_type = MATSEQAIJ; 1816 coarse_pc_type = PCLU; 1817 coarse_ksp_type = KSPPREONLY; 1818 coarse_comm = PETSC_COMM_SELF; 1819 active_rank = master_proc; 1820 break; 1821 } 1822 1823 switch(pcbddc->coarse_communications_type){ 1824 1825 case(SCATTERS_BDDC): 1826 { 1827 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 1828 1829 PetscMPIInt send_size; 1830 PetscInt *aux_ins_indices; 1831 PetscInt ii,jj; 1832 MPI_Request *requests; 1833 1834 /* allocate auxiliary space */ 1835 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1836 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1837 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 1838 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 1839 /* allocate stuffs for message massing */ 1840 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 1841 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 1842 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1843 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1844 /* fill up quantities */ 1845 j=0; 1846 for(i=0;i<count_recv;i++){ 1847 ii = ranks_recv[i]; 1848 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 1849 localdispl2[i]=j; 1850 j+=localsizes2[i]; 1851 jj = pcbddc->local_primal_displacements[ii]; 1852 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 1853 } 1854 //printf("aux_ins_indices 1\n"); 1855 //for(i=0;i<pcbddc->coarse_size;i++) 1856 // printf("%d ",aux_ins_indices[i]); 1857 //printf("\n"); 1858 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 1859 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1860 /* evaluate how many values I will insert in coarse mat */ 1861 ins_local_primal_size=0; 1862 for(i=0;i<pcbddc->coarse_size;i++) 1863 if(aux_ins_indices[i]) 1864 ins_local_primal_size++; 1865 /* evaluate indices I will insert in coarse mat */ 1866 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 1867 j=0; 1868 for(i=0;i<pcbddc->coarse_size;i++) 1869 if(aux_ins_indices[i]) 1870 ins_local_primal_indices[j++]=i; 1871 /* use aux_ins_indices to realize a global to local mapping */ 1872 j=0; 1873 for(i=0;i<pcbddc->coarse_size;i++){ 1874 if(aux_ins_indices[i]==0){ 1875 aux_ins_indices[i]=-1; 1876 } else { 1877 aux_ins_indices[i]=j; 1878 j++; 1879 } 1880 } 1881 1882 //printf("New details localsizes2 localdispl2\n"); 1883 //for(i=0;i<count_recv;i++) 1884 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 1885 //printf("\n"); 1886 //printf("aux_ins_indices 2\n"); 1887 //for(i=0;i<pcbddc->coarse_size;i++) 1888 // printf("%d ",aux_ins_indices[i]); 1889 //printf("\n"); 1890 //printf("ins_local_primal_indices\n"); 1891 //for(i=0;i<ins_local_primal_size;i++) 1892 // printf("%d ",ins_local_primal_indices[i]); 1893 //printf("\n"); 1894 //printf("coarse_submat_vals\n"); 1895 //for(i=0;i<pcbddc->local_primal_size;i++) 1896 // for(j=0;j<pcbddc->local_primal_size;j++) 1897 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 1898 //printf("\n"); 1899 1900 /* processes partecipating in coarse problem receive matrix data from their friends */ 1901 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 1902 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 1903 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 1904 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 1905 } 1906 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1907 1908 //if(coarse_color == 0) { 1909 // printf("temp_coarse_mat_vals\n"); 1910 // for(k=0;k<count_recv;k++){ 1911 // printf("---- %d ----\n",ranks_recv[k]); 1912 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 1913 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 1914 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 1915 // printf("\n"); 1916 // } 1917 //} 1918 /* calculate data to insert in coarse mat */ 1919 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1920 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 1921 1922 PetscMPIInt rr,kk,lps,lpd; 1923 PetscInt row_ind,col_ind; 1924 for(k=0;k<count_recv;k++){ 1925 rr = ranks_recv[k]; 1926 kk = localdispl2[k]; 1927 lps = pcbddc->local_primal_sizes[rr]; 1928 lpd = pcbddc->local_primal_displacements[rr]; 1929 //printf("Inserting the following indices (received from %d)\n",rr); 1930 for(j=0;j<lps;j++){ 1931 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 1932 for(i=0;i<lps;i++){ 1933 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 1934 //printf("%d %d\n",row_ind,col_ind); 1935 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 1936 } 1937 } 1938 } 1939 ierr = PetscFree(requests);CHKERRQ(ierr); 1940 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 1941 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 1942 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 1943 1944 /* create local to global mapping needed by coarse MATIS */ 1945 { 1946 IS coarse_IS; 1947 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 1948 coarse_comm = prec_comm; 1949 active_rank=rank_prec_comm; 1950 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 1951 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 1952 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 1953 } 1954 } 1955 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 1956 /* arrays for values insertion */ 1957 ins_local_primal_size = pcbddc->local_primal_size; 1958 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1959 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1960 for(j=0;j<ins_local_primal_size;j++){ 1961 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 1962 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 1963 } 1964 } 1965 break; 1966 1967 } 1968 1969 case(GATHERS_BDDC): 1970 { 1971 1972 PetscMPIInt mysize,mysize2; 1973 1974 if(rank_prec_comm==active_rank) { 1975 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1976 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 1977 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1978 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1979 /* arrays for values insertion */ 1980 ins_local_primal_size = pcbddc->coarse_size; 1981 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 1982 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1983 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 1984 localdispl2[0]=0; 1985 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 1986 j=0; 1987 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 1988 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1989 } 1990 1991 mysize=pcbddc->local_primal_size; 1992 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 1993 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 1994 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1995 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 1996 } else { 1997 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1998 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 1999 } 2000 2001 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2002 if(rank_prec_comm==active_rank) { 2003 PetscInt offset,offset2,row_ind,col_ind; 2004 for(j=0;j<ins_local_primal_size;j++){ 2005 ins_local_primal_indices[j]=j; 2006 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2007 } 2008 for(k=0;k<size_prec_comm;k++){ 2009 offset=pcbddc->local_primal_displacements[k]; 2010 offset2=localdispl2[k]; 2011 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2012 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2013 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2014 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2015 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2016 } 2017 } 2018 } 2019 } 2020 break; 2021 }//switch on coarse problem and communications associated with finished 2022 } 2023 2024 /* Now create and fill up coarse matrix */ 2025 if( rank_prec_comm == active_rank ) { 2026 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2027 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2028 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2029 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2030 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2031 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2032 } else { 2033 Mat matis_coarse_local_mat; 2034 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2035 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2036 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2037 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2038 } 2039 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2040 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2041 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2043 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2044 Mat matis_coarse_local_mat; 2045 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2046 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2047 } 2048 2049 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2050 /* Preconditioner for coarse problem */ 2051 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2052 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2053 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2054 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2055 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2056 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2057 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2058 /* Allow user's customization */ 2059 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2060 /* Set Up PC for coarse problem BDDC */ 2061 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2062 if(dbg_flag) { 2063 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2064 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2065 } 2066 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2067 } 2068 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2069 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2070 if(dbg_flag) { 2071 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2073 } 2074 } 2075 } 2076 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2077 IS local_IS,global_IS; 2078 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2079 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2080 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2081 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2082 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2083 } 2084 2085 2086 /* Check condition number of coarse problem */ 2087 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && dbg_flag && rank_prec_comm == active_rank ) { 2088 PetscScalar m_one=-1.0; 2089 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2090 2091 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2092 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2093 ierr = KSPSetType(pcbddc->coarse_ksp,KSPGMRES);CHKERRQ(ierr); 2094 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2095 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2096 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2097 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2098 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2099 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2100 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2101 kappa_2=lambda_max/lambda_min; 2102 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2103 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2104 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2105 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of gmres is: % 1.14e\n",k,kappa_2);CHKERRQ(ierr); 2106 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2107 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2108 /* restore coarse ksp to default values */ 2109 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2110 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2111 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2112 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2113 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2114 } 2115 2116 /* free data structures no longer needed */ 2117 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2118 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2119 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2120 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2121 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2122 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2123 2124 PetscFunctionReturn(0); 2125 } 2126 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2129 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2130 { 2131 2132 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2133 PC_IS *pcis = (PC_IS*)pc->data; 2134 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2135 PetscInt **neighbours_set; 2136 PetscInt bs,ierr,i,j,s,k,iindex; 2137 PetscInt total_counts; 2138 PetscBool flg_row; 2139 PCBDDCGraph mat_graph; 2140 PetscInt neumann_bsize; 2141 const PetscInt* neumann_nodes; 2142 Mat mat_adj; 2143 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2144 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2145 PetscInt* queue_in_global_numbering; 2146 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2147 2148 PetscFunctionBegin; 2149 2150 /* allocate and initialize needed graph structure */ 2151 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2152 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2153 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2154 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2155 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2156 /* ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->where);CHKERRQ(ierr); 2157 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->count);CHKERRQ(ierr); 2158 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->which_dof);CHKERRQ(ierr); 2159 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&mat_graph->queue);CHKERRQ(ierr); 2160 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->cptr);CHKERRQ(ierr); 2161 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 2162 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscBool),&mat_graph->touched);CHKERRQ(ierr); */ 2163 i = mat_graph->nvtxs; 2164 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2165 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2166 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2167 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2168 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2169 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2170 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2171 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2172 2173 /* TODO: IS for dof splitting */ 2174 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2175 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2176 for(s=0;s<bs;s++) { 2177 mat_graph->which_dof[i*bs+s]=s; 2178 } 2179 } 2180 2181 total_counts=0; 2182 for(i=0;i<pcis->n_neigh;i++){ 2183 s=pcis->n_shared[i]; 2184 total_counts+=s; 2185 for(j=0;j<s;j++){ 2186 mat_graph->count[pcis->shared[i][j]] += 1; 2187 } 2188 } 2189 /* Take into account Neumann data incrementing number of sharing subdomains for all but faces nodes lying on the interface */ 2190 if(pcbddc->NeumannBoundaries) { 2191 ierr = ISGetLocalSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2192 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2193 for(i=0;i<neumann_bsize;i++){ 2194 iindex = neumann_nodes[i]; 2195 if(mat_graph->count[iindex] > 2){ 2196 mat_graph->count[iindex]+=1; 2197 total_counts++; 2198 } 2199 } 2200 } 2201 2202 /* allocate space for storing neighbours' set */ 2203 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2204 if(mat_graph->nvtxs) ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); 2205 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2206 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2207 for(i=0;i<pcis->n_neigh;i++){ 2208 s=pcis->n_shared[i]; 2209 for(j=0;j<s;j++) { 2210 k=pcis->shared[i][j]; 2211 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2212 mat_graph->count[k]+=1; 2213 } 2214 } 2215 /* set -1 fake neighbour */ 2216 if(pcbddc->NeumannBoundaries) { 2217 for(i=0;i<neumann_bsize;i++){ 2218 iindex = neumann_nodes[i]; 2219 if( mat_graph->count[iindex] > 2){ 2220 neighbours_set[iindex][mat_graph->count[iindex]] = -1; /* An additional fake neighbour (with rank -1) */ 2221 mat_graph->count[iindex]+=1; 2222 } 2223 } 2224 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2225 } 2226 /* sort set of sharing subdomains for comparison */ 2227 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2228 /* start analyzing local interface */ 2229 PetscInt nodes_touched=0; 2230 for(i=0;i<mat_graph->nvtxs;i++){ 2231 if(!mat_graph->count[i]){ /* internal nodes */ 2232 mat_graph->touched[i]=PETSC_TRUE; 2233 mat_graph->where[i]=0; 2234 nodes_touched++; 2235 } 2236 } 2237 PetscInt where_values=1; 2238 PetscBool same_set; 2239 mat_graph->ncmps = 0; 2240 while(nodes_touched<mat_graph->nvtxs) { 2241 /* find first untouched node in local ordering */ 2242 i=0; 2243 while(mat_graph->touched[i]) i++; 2244 mat_graph->touched[i]=PETSC_TRUE; 2245 mat_graph->where[i]=where_values; 2246 nodes_touched++; 2247 /* now find all other nodes having the same set of sharing subdomains */ 2248 for(j=i+1;j<mat_graph->nvtxs;j++){ 2249 /* check for same number of sharing subdomains and dof number */ 2250 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2251 /* check for same set of sharing subdomains */ 2252 same_set=PETSC_TRUE; 2253 for(k=0;k<mat_graph->count[j];k++){ 2254 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2255 same_set=PETSC_FALSE; 2256 } 2257 } 2258 /* I found a friend of mine */ 2259 if(same_set) { 2260 mat_graph->where[j]=where_values; 2261 mat_graph->touched[j]=PETSC_TRUE; 2262 nodes_touched++; 2263 } 2264 } 2265 } 2266 where_values++; 2267 } 2268 where_values--; if(where_values<0) where_values=0; 2269 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2270 /* Find connected components defined on the shared interface */ 2271 if(where_values) { 2272 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2273 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component contained in the global queue (and carring together the local queue) */ 2274 for(i=0;i<mat_graph->ncmps;i++) { 2275 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2276 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2277 } 2278 /* for(i=0;i<mat_graph->ncmps;i++) { 2279 printf("[queue num %d] ptr %d, length %d: global node (local node)\n",i,mat_graph->cptr[i],mat_graph->cptr[i+1]-mat_graph->cptr[i]); 2280 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++){ 2281 printf("%d (%d) ",queue_in_global_numbering[mat_graph->cptr[i]+j],mat_graph->queue[mat_graph->cptr[i]+j]); 2282 } 2283 printf("\n"); 2284 } */ 2285 } 2286 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2287 for(i=0;i<where_values;i++) { 2288 if(mat_graph->where_ncmps[i]>1) { /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2289 adapt_interface=1; 2290 break; 2291 } 2292 } 2293 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2294 if(where_values && adapt_interface_reduced) { 2295 2296 PetscInt sum_requests=0,my_rank; 2297 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2298 PetscInt temp_buffer_size,ins_val,global_where_counter; 2299 PetscInt *cum_recv_counts; 2300 PetscInt *where_to_nodes_indices; 2301 PetscInt *petsc_buffer; 2302 PetscMPIInt *recv_buffer; 2303 PetscMPIInt *recv_buffer_where; 2304 PetscMPIInt *send_buffer; 2305 PetscMPIInt size_of_send; 2306 PetscInt *sizes_of_sends; 2307 MPI_Request *send_requests; 2308 MPI_Request *recv_requests; 2309 PetscInt *where_cc_adapt; 2310 PetscInt **temp_buffer; 2311 PetscInt *nodes_to_temp_buffer_indices; 2312 PetscInt *add_to_where; 2313 2314 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2315 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2316 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2317 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2318 /* first count how many neighbours per connected component I will receive from */ 2319 cum_recv_counts[0]=0; 2320 for(i=1;i<where_values+1;i++){ 2321 j=0; 2322 while(mat_graph->where[j] != i) j++; 2323 where_to_nodes_indices[i-1]=j; 2324 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2325 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-2; } 2326 } 2327 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2328 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2329 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2330 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2331 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2332 for(i=0;i<cum_recv_counts[where_values];i++) { 2333 send_requests[i]=MPI_REQUEST_NULL; 2334 recv_requests[i]=MPI_REQUEST_NULL; 2335 } 2336 /* printf("MPI sizes: buffer size for send %d, number of requests %d\n",buffer_size,cum_recv_counts[where_values]); */ 2337 2338 /* exchange with my neighbours the number of my connected components on the shared interface */ 2339 for(i=0;i<where_values;i++){ 2340 j=where_to_nodes_indices[i]; 2341 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2342 for(;k<mat_graph->count[j];k++){ 2343 if(neighbours_set[j][k] != my_rank) { 2344 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2345 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2346 /*printf("SENDREQ[%d]: to %d (tag %d) the value %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],mat_graph->where_ncmps[i]); 2347 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],sum_requests);*/ 2348 sum_requests++; 2349 } 2350 } 2351 } 2352 /* printf("Final number of request: s=r=%d (should be equal to %d)\n",sum_requests,cum_recv_counts[where_values]); */ 2353 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2354 /*for(i=0;i<where_values;i++){ 2355 printf("my where_ncmps[%d]=%d\n",i,mat_graph->where_ncmps[i]); 2356 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2357 printf(" recv where_ncmps[%d]=%d\n",j,recv_buffer_where[j]); 2358 } 2359 }*/ 2360 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2361 2362 /* determine the connected component I need to adapt */ 2363 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2364 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2365 for(i=0;i<where_values;i++){ 2366 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2367 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2368 where_cc_adapt[i]=PETSC_TRUE; 2369 break; 2370 } 2371 } 2372 } 2373 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2374 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2375 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2376 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2377 sum_requests=0; 2378 start_of_send=0; 2379 start_of_recv=cum_recv_counts[where_values]; 2380 for(i=0;i<where_values;i++) { 2381 if(where_cc_adapt[i]) { 2382 size_of_send=0; 2383 for(j=i;j<mat_graph->ncmps;j++) { 2384 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2385 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2386 size_of_send+=1; 2387 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2388 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2389 } 2390 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2391 } 2392 } 2393 j = where_to_nodes_indices[i]; 2394 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2395 for(;k<mat_graph->count[j];k++){ 2396 if(neighbours_set[j][k] != my_rank) { 2397 /* printf("SENDREQ[%d]: to %d (tag %d) the value %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],size_of_send); 2398 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],sum_requests+start_of_recv);*/ 2399 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2400 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2401 sum_requests++; 2402 } 2403 } 2404 sizes_of_sends[i]=size_of_send; 2405 start_of_send+=size_of_send; 2406 } 2407 } 2408 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2409 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2410 /*j=0; 2411 for(k=0;k<where_values;k++) { j+=sizes_of_sends[k]; } 2412 printf("This is the send buffer (size %d): \n",j); 2413 for(k=0;k<j;k++) { printf("%d ",send_buffer[k]); } 2414 printf("\n");*/ 2415 buffer_size=0; 2416 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2417 /* printf("Allocating recv buffer of size %d\n",buffer_size); */ 2418 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2419 /* now exchange the data */ 2420 start_of_recv=0; 2421 start_of_send=0; 2422 sum_requests=0; 2423 for(i=0;i<where_values;i++) { 2424 if(where_cc_adapt[i]) { 2425 size_of_send = sizes_of_sends[i]; 2426 j = where_to_nodes_indices[i]; 2427 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2428 for(;k<mat_graph->count[j];k++){ 2429 if(neighbours_set[j][k] != my_rank) { 2430 /* printf("SENDREQ[%d]: to %d (tag %d) with size %d at buffer address %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],size_of_send,start_of_send); 2431 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],start_of_recv); */ 2432 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2433 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2434 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2435 start_of_recv+=size_of_recv; 2436 sum_requests++; 2437 } 2438 } 2439 start_of_send+=size_of_send; 2440 } 2441 } 2442 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2443 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2444 /*printf("This is what I received (size %d): \n",start_of_recv); 2445 for(k=0;k<start_of_recv;k++) { printf("%d ",recv_buffer[k]); } 2446 printf("\n");*/ 2447 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2448 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2449 for(j=0;j<buffer_size;) { 2450 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2451 k=petsc_buffer[j]+1; 2452 j+=k; 2453 } 2454 /*printf("This is what I received in local numbering (unrolled): \n"); 2455 i=0; 2456 for(j=0;j<buffer_size;) { 2457 printf("queue num %d (j=%d)\n",i,j); 2458 for(k=1;k<petsc_buffer[j]+1;k++) { printf("%d ",petsc_buffer[k+j]); } 2459 printf("\n"); 2460 i++;j+=k; 2461 }*/ 2462 sum_requests=cum_recv_counts[where_values]; 2463 start_of_recv=0; 2464 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2465 global_where_counter=0; 2466 for(i=0;i<where_values;i++){ 2467 if(where_cc_adapt[i]){ 2468 temp_buffer_size=0; 2469 /* find nodes on the shared interface we need to adapt */ 2470 for(j=0;j<mat_graph->nvtxs;j++){ 2471 if(mat_graph->where[j]==i+1) { 2472 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2473 temp_buffer_size++; 2474 } else { 2475 nodes_to_temp_buffer_indices[j]=-1; 2476 } 2477 } 2478 /* allocate some temporary space */ 2479 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2480 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2481 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2482 for(j=1;j<temp_buffer_size;j++){ 2483 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2484 } 2485 /* analyze contributions from neighbouring subdomains for i-th conn comp 2486 temp buffer structure: 2487 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2488 3 neighs procs with structured connected components: 2489 neigh 0: [0 1 4], [2 3]; (2 connected components) 2490 neigh 1: [0 1], [2 3 4]; (2 connected components) 2491 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2492 tempbuffer (row-oriented) should be filled as: 2493 [ 0, 0, 0; 2494 0, 0, 1; 2495 1, 1, 2; 2496 1, 1, 2; 2497 0, 1, 0; ]; 2498 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2499 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2500 */ 2501 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2502 ins_val=0; 2503 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2504 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2505 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2506 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2507 } 2508 buffer_size+=k; 2509 ins_val++; 2510 } 2511 start_of_recv+=size_of_recv; 2512 sum_requests++; 2513 } 2514 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2515 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2516 for(j=0;j<temp_buffer_size;j++){ 2517 if(!add_to_where[j]){ /* found a new cc */ 2518 global_where_counter++; 2519 add_to_where[j]=global_where_counter; 2520 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2521 same_set=PETSC_TRUE; 2522 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2523 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2524 same_set=PETSC_FALSE; 2525 break; 2526 } 2527 } 2528 if(same_set) add_to_where[k]=global_where_counter; 2529 } 2530 } 2531 } 2532 /* insert new data in where array */ 2533 temp_buffer_size=0; 2534 for(j=0;j<mat_graph->nvtxs;j++){ 2535 if(mat_graph->where[j]==i+1) { 2536 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2537 temp_buffer_size++; 2538 } 2539 } 2540 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2541 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2542 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2543 } 2544 } 2545 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2546 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2547 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2548 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2549 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2550 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2551 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2552 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2553 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2554 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2555 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2556 if(global_where_counter) { 2557 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2558 global_where_counter=0; 2559 for(i=0;i<mat_graph->nvtxs;i++){ 2560 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2561 global_where_counter++; 2562 for(j=i+1;j<mat_graph->nvtxs;j++){ 2563 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2564 mat_graph->where[j]=global_where_counter; 2565 mat_graph->touched[j]=PETSC_TRUE; 2566 } 2567 } 2568 mat_graph->where[i]=global_where_counter; 2569 mat_graph->touched[i]=PETSC_TRUE; 2570 } 2571 } 2572 where_values=global_where_counter; 2573 } 2574 if(global_where_counter) { 2575 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2576 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2577 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2578 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2579 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2580 for(i=0;i<mat_graph->ncmps;i++) { 2581 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2582 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2583 } 2584 } 2585 } 2586 2587 /* count vertices, edges and faces */ 2588 PetscInt nfc=0; 2589 PetscInt nec=0; 2590 PetscInt nvc=0; 2591 for (i=0; i<mat_graph->ncmps; i++) { 2592 if( !pcbddc->vertices_flag ) { 2593 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2594 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==2){ 2595 nfc++; 2596 } else { 2597 nec++; 2598 } 2599 } 2600 } 2601 if( !pcbddc->constraints_flag ){ 2602 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2603 nvc++; 2604 } 2605 } 2606 } 2607 2608 pcbddc->n_constraints = nec+nfc; 2609 pcbddc->n_vertices = nvc; 2610 2611 if(pcbddc->n_constraints){ 2612 /* allocate space for local constraints of BDDC */ 2613 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt*),&pcbddc->indices_to_constraint);CHKERRQ(ierr); 2614 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscScalar*),&pcbddc->quadrature_constraint);CHKERRQ(ierr); 2615 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt),&pcbddc->sizes_of_constraint);CHKERRQ(ierr); 2616 k=0; 2617 for (i=0; i<mat_graph->ncmps; i++) { 2618 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2619 pcbddc->sizes_of_constraint[k] = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2620 k++; 2621 } 2622 } 2623 k=0; 2624 for (i=0; i<pcbddc->n_constraints; i++) k+=pcbddc->sizes_of_constraint[i]; 2625 ierr = PetscMalloc (k*sizeof(PetscInt),&pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 2626 ierr = PetscMalloc (k*sizeof(PetscScalar),&pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 2627 for (i=1; i<pcbddc->n_constraints; i++) { 2628 pcbddc->indices_to_constraint[i] = pcbddc->indices_to_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2629 pcbddc->quadrature_constraint[i] = pcbddc->quadrature_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2630 } 2631 k=0; 2632 PetscScalar quad_value; 2633 for (i=0; i<mat_graph->ncmps; i++) { 2634 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2635 quad_value=1.0/( (PetscReal) (mat_graph->cptr[i+1]-mat_graph->cptr[i]) ); 2636 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2637 pcbddc->indices_to_constraint[k][j] = mat_graph->queue[mat_graph->cptr[i]+j]; 2638 pcbddc->quadrature_constraint[k][j] = quad_value; 2639 } 2640 k++; 2641 } 2642 } 2643 } 2644 if(pcbddc->n_vertices){ 2645 /* allocate space for local vertices of BDDC */ 2646 ierr = PetscMalloc (pcbddc->n_vertices*sizeof(PetscInt),&pcbddc->vertices);CHKERRQ(ierr); 2647 k=0; 2648 for (i=0; i<mat_graph->ncmps; i++) { 2649 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2650 pcbddc->vertices[k] = mat_graph->queue[mat_graph->cptr[i]]; 2651 k++; 2652 } 2653 } 2654 /* sort vertex set (by local ordering) */ 2655 ierr = PetscSortInt(pcbddc->n_vertices,pcbddc->vertices);CHKERRQ(ierr); 2656 } 2657 2658 if(pcbddc->dbg_flag) { 2659 PetscViewer viewer=pcbddc->dbg_viewer; 2660 2661 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2662 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2663 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2664 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 2665 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2666 for(i=0;i<mat_graph->nvtxs;i++) { 2667 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 2668 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 2669 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 2670 } 2671 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 2672 }*/ 2673 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 2674 for(i=0;i<mat_graph->ncmps;i++) { 2675 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nSize and count for connected component %02d : %04d %01d\n", i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 2676 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 2677 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 2678 } 2679 } 2680 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 2681 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,pcbddc->n_vertices);CHKERRQ(ierr); } 2682 if( nfc ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); } 2683 if( nec ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); } 2684 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Global indices for subdomain vertices follows\n",PetscGlobalRank,pcbddc->n_vertices);CHKERRQ(ierr); } 2685 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->n_vertices,pcbddc->vertices,queue_in_global_numbering);CHKERRQ(ierr); 2686 for(i=0;i<pcbddc->n_vertices;i++){ ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",queue_in_global_numbering[i]);CHKERRQ(ierr); } 2687 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } 2688 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2689 } 2690 2691 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 2692 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2693 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2694 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2695 /* Free graph structure */ 2696 if(mat_graph->nvtxs){ 2697 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 2698 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 2699 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 2700 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 2701 /* ierr = PetscFree(mat_graph->where);CHKERRQ(ierr); 2702 ierr = PetscFree(mat_graph->touched);CHKERRQ(ierr); 2703 ierr = PetscFree(mat_graph->which_dof);CHKERRQ(ierr); 2704 ierr = PetscFree(mat_graph->queue);CHKERRQ(ierr); 2705 ierr = PetscFree(mat_graph->cptr);CHKERRQ(ierr); 2706 ierr = PetscFree(mat_graph->count);CHKERRQ(ierr); 2707 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr);*/ 2708 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2709 } 2710 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 2711 2712 PetscFunctionReturn(0); 2713 2714 } 2715 2716 /* -------------------------------------------------------------------------- */ 2717 2718 /* The following code has been adapted from function IsConnectedSubdomain contained 2719 in source file contig.c of METIS library (version 5.0.1) */ 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "PCBDDCFindConnectedComponents" 2723 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist )//, PetscInt *dist_vals) 2724 { 2725 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 2726 PetscInt *xadj, *adjncy, *where, *queue; 2727 PetscInt *cptr; 2728 PetscBool *touched; 2729 2730 PetscFunctionBegin; 2731 2732 nvtxs = graph->nvtxs; 2733 xadj = graph->xadj; 2734 adjncy = graph->adjncy; 2735 where = graph->where; 2736 touched = graph->touched; 2737 queue = graph->queue; 2738 cptr = graph->cptr; 2739 2740 for (i=0; i<nvtxs; i++) 2741 touched[i] = PETSC_FALSE; 2742 2743 cum_queue=0; 2744 ncmps=0; 2745 2746 for(n=0; n<n_dist; n++) { 2747 //pid = dist_vals[n]; 2748 pid = n+1; 2749 nleft = 0; 2750 for (i=0; i<nvtxs; i++) { 2751 if (where[i] == pid) 2752 nleft++; 2753 } 2754 for (i=0; i<nvtxs; i++) { 2755 if (where[i] == pid) 2756 break; 2757 } 2758 touched[i] = PETSC_TRUE; 2759 queue[cum_queue] = i; 2760 first = 0; last = 1; 2761 cptr[ncmps] = cum_queue; /* This actually points to queue */ 2762 ncmps_pid = 0; 2763 while (first != nleft) { 2764 if (first == last) { /* Find another starting vertex */ 2765 cptr[++ncmps] = first+cum_queue; 2766 ncmps_pid++; 2767 for (i=0; i<nvtxs; i++) { 2768 if (where[i] == pid && !touched[i]) 2769 break; 2770 } 2771 queue[cum_queue+last] = i; 2772 last++; 2773 touched[i] = PETSC_TRUE; 2774 } 2775 i = queue[cum_queue+first]; 2776 first++; 2777 for (j=xadj[i]; j<xadj[i+1]; j++) { 2778 k = adjncy[j]; 2779 if (where[k] == pid && !touched[k]) { 2780 queue[cum_queue+last] = k; 2781 last++; 2782 touched[k] = PETSC_TRUE; 2783 } 2784 } 2785 } 2786 cptr[++ncmps] = first+cum_queue; 2787 ncmps_pid++; 2788 cum_queue=cptr[ncmps]; 2789 graph->where_ncmps[n] = ncmps_pid; 2790 } 2791 graph->ncmps = ncmps; 2792 2793 PetscFunctionReturn(0); 2794 } 2795 2796